apache · zhztheplayer · May 10, 2024 · May 30, 2024
diff --git a/...nds-velox/src/main/scala/org/apache/spark/sql/execution/VeloxColumnarWriteFilesExec.scala b/...nds-velox/src/main/scala/org/apache/spark/sql/execution/VeloxColumnarWriteFilesExec.scala
@@ -34,6 +34,8 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, GenericInternalRow}
 import org.apache.spark.sql.connector.write.WriterCommitMessage
 import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.execution.ui.{SparkPlanGraphEdge, SparkPlanGraphNodeWrapper, SparkPlanGraphWrapper}
+import org.apache.spark.sql.execution.ui.adjustment.PlanGraphAdjustment
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.Utils
@@ -270,9 +272,14 @@ case class VeloxColumnarWriteFilesExec private (
   extends BinaryExecNode
   with GlutenPlan
   with VeloxColumnarWriteFilesExec.ExecuteWriteCompatible {
+  import VeloxColumnarWriteFilesExec._
 
   val child: SparkPlan = left
 
+  // Make sure we hide the noop leaf from fallback report / SQL UI.
+  HideNoopLeafFromFallBackReport.ensureRegistered()
+  HideNoopLeafFromVeloxColumnarWriteFiles.ensureRegistered()
+
   override lazy val references: AttributeSet = AttributeSet.empty
 
   override def supportsColumnar(): Boolean = true
@@ -322,14 +329,14 @@ case class VeloxColumnarWriteFilesExec private (
       new VeloxColumnarWriteFilesRDD(rdd, writeFilesSpec, jobTrackerID)
     }
   }
+
   override protected def withNewChildrenInternal(
       newLeft: SparkPlan,
       newRight: SparkPlan): SparkPlan =
     copy(newLeft, newRight, fileFormat, partitionColumns, bucketSpec, options, staticPartitions)
 }
 
 object VeloxColumnarWriteFilesExec {
-
   def apply(
       child: SparkPlan,
       fileFormat: FileFormat,
@@ -373,10 +380,66 @@ object VeloxColumnarWriteFilesExec {
 
   sealed trait ExecuteWriteCompatible {
     // To be compatible with Spark (version < 3.4)
-    protected def doExecuteWrite(writeFilesSpec: WriteFilesSpec): RDD[WriterCommitMessage] = {
-      throw new GlutenException(
-        s"Internal Error ${this.getClass} has write support" +
-          s" mismatch:\n${this}")
+    protected def doExecuteWrite(writeFilesSpec: WriteFilesSpec): RDD[WriterCommitMessage]
+  }
+
+  // Hide the noop leaf from fall back reporting.
+  private object HideNoopLeafFromFallBackReport extends GlutenExplainUtils.HideFallbackReason {
+    override def shouldHide(plan: SparkPlan): Boolean = {
+      hasOnlyOneNoopLeaf(plan)
+    }
+
+    // True if the plan tree has and only has one single NoopLeaf as its leaf.
+    private def hasOnlyOneNoopLeaf(plan: SparkPlan): Boolean = {
+      if (plan.children.size > 1) {
+        return false
+      }
+      if (plan.children.size == 1) {
+        return hasOnlyOneNoopLeaf(plan.children.head)
+      }
+      plan.isInstanceOf[NoopLeaf]
+    }
+  }
+
+  // Hide the noop leaf from SQL UI.
+  private object HideNoopLeafFromVeloxColumnarWriteFiles extends PlanGraphAdjustment {
+    override def apply(graph: SparkPlanGraphWrapper): SparkPlanGraphWrapper = {
+      val nodeLeafNodes = graph.nodes
+        .filter(_.node != null)
+        .filter(n => n.node.name == "NoopLeaf")
+      if (nodeLeafNodes.isEmpty) {
+        return graph
+      }
+      val nodesToRemove = mutable.ListBuffer[SparkPlanGraphNodeWrapper]()
+      val edgesToRemove = mutable.ListBuffer[SparkPlanGraphEdge]()
+
+      nodeLeafNodes.foreach {
+        node =>
+          nodesToRemove += node
+          keepFinding()
+          def keepFinding(): Unit = {
+            var tmp = node
+            for (_ <- graph.nodes.indices) {
+              val parent = findParent(graph, tmp)
+              if (parent.isEmpty) {
+                return
+              }
+              if (parent.get._1.node.name == "VeloxColumnarWriteFiles") {
+                edgesToRemove += parent.get._2
+                return
+              }
+              nodesToRemove += parent.get._1
+              edgesToRemove += parent.get._2
+              tmp = parent.get._1
+            }
+          }
+      }
+
+      new SparkPlanGraphWrapper(
+        graph.executionId,
+        (graph.nodes.toSet -- nodesToRemove).toSeq,
+        (graph.edges.toSet -- edgesToRemove).toSeq
+      )
     }
   }
 }
diff --git a/gluten-core/src/main/scala/org/apache/spark/sql/execution/GlutenExplainUtils.scala b/gluten-core/src/main/scala/org/apache/spark/sql/execution/GlutenExplainUtils.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec}
 
 import java.util
 import java.util.Collections.newSetFromMap
+import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.mutable
 import scala.collection.mutable.{ArrayBuffer, BitSet}
@@ -103,6 +104,7 @@ object GlutenExplainUtils extends AdaptiveSparkPlanHelper {
             addFallbackNodeWithReason(i, "Columnar table cache is disabled", fallbackNodeToReason)
           }
         case _: AQEShuffleReadExec => // Ignore
+        case p: SparkPlan if exclusions.exists(_.shouldHide(p)) =>
         case p: SparkPlan =>
           handleVanillaSparkPlan(p, fallbackNodeToReason)
           p.innerChildren.foreach(collect)
@@ -113,6 +115,22 @@ object GlutenExplainUtils extends AdaptiveSparkPlanHelper {
     (numGlutenNodes, fallbackNodeToReason.toMap)
   }
 
+  private val exclusions: mutable.ListBuffer[HideFallbackReason] = mutable.ListBuffer()
+
+  trait HideFallbackReason {
+    private val registered: AtomicBoolean = new AtomicBoolean(false)
+
+    final def ensureRegistered(): Unit = {
+      if (!registered.compareAndSet(false, true)) {
+        return
+      }
+      exclusions.synchronized {
+        exclusions += this
+      }
+    }
+    def shouldHide(plan: SparkPlan): Boolean
+  }
+
   /**
    * Given a input physical plan, performs the following tasks.
    *   1. Generate the two part explain output for this plan.

diff --git a/gluten-ui/src/main/scala/org/apache/spark/sql/execution/ui/GlutenSQLAppStatusListener.scala b/gluten-ui/src/main/scala/org/apache/spark/sql/execution/ui/GlutenSQLAppStatusListener.scala
@@ -21,6 +21,7 @@ import org.apache.gluten.events.{GlutenBuildInfoEvent, GlutenPlanFallbackEvent}
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler._
+import org.apache.spark.sql.execution.ui.adjustment.PlanGraphAdjustment
 import org.apache.spark.sql.internal.StaticSQLConf._
 import org.apache.spark.status.{ElementTrackingStore, KVUtils}
 
@@ -79,6 +80,7 @@ class GlutenSQLAppStatusListener(conf: SparkConf, kvstore: ElementTrackingStore)
   }
 
   private def onSQLExtensionEnd(event: SparkListenerSQLExecutionEnd): Unit = {
+    PlanGraphAdjustment.adjust(kvstore, event.executionId)
     executionIdToDescription.remove(event.executionId)
     executionIdToFallbackEvent.remove(event.executionId)
   }

diff --git a/...-ui/src/main/scala/org/apache/spark/sql/execution/ui/adjustment/PlanGraphAdjustment.scala b/...-ui/src/main/scala/org/apache/spark/sql/execution/ui/adjustment/PlanGraphAdjustment.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.ui.adjustment
+
+import org.apache.spark.sql.execution.ui.{SparkPlanGraphEdge, SparkPlanGraphNodeWrapper, SparkPlanGraphWrapper}
+import org.apache.spark.sql.execution.ui.adjustment.PlanGraphAdjustment.register
+import org.apache.spark.status.ElementTrackingStore
+
+import java.util.concurrent.atomic.AtomicBoolean
+
+import scala.collection.mutable
+import scala.collection.mutable.ListBuffer
+
+trait PlanGraphAdjustment {
+  private val registered: AtomicBoolean = new AtomicBoolean(false)
+
+  final def ensureRegistered(): Unit = {
+    if (!registered.compareAndSet(false, true)) {
+      return
+    }
+    register(this)
+  }
+
+  final protected def findChildren(
+      graph: SparkPlanGraphWrapper,
+      node: SparkPlanGraphNodeWrapper): Seq[(SparkPlanGraphNodeWrapper, SparkPlanGraphEdge)] = {
+    val id = node.node.id
+    val out = graph.edges
+      .filter(_.toId == id)
+      .flatMap {
+        childEdge =>
+          graph.nodes
+            .filter(_.node != null)
+            .find(_.node.id == childEdge.fromId)
+            .map(childNode => (childNode, childEdge))
+      }
+    out
+  }
+
+  final protected def findParent(
+      graph: SparkPlanGraphWrapper,
+      node: SparkPlanGraphNodeWrapper): Option[(SparkPlanGraphNodeWrapper, SparkPlanGraphEdge)] = {
+    val id = node.node.id
+    val out = graph.edges
+      .filter(_.fromId == id)
+      .flatMap {
+        parentEdge =>
+          graph.nodes
+            .filter(_.node != null)
+            .find(_.node.id == parentEdge.toId)
+            .map(parentNode => (parentNode, parentEdge))
+      }
+    if (out.isEmpty) {
+      return None
+    }
+    if (out.size == 1) {
+      return Some(out.head)
+    }
+    throw new IllegalStateException("Node has multiple parents, it should not happen: " + node)
+  }
+
+  def apply(from: SparkPlanGraphWrapper): SparkPlanGraphWrapper
+}
+
+object PlanGraphAdjustment {
+  private val adjustments: ListBuffer[PlanGraphAdjustment] = mutable.ListBuffer()
+
+  private[ui] def register(adjustment: PlanGraphAdjustment): Unit = synchronized {
+    adjustments += adjustment
+  }
+
+  private[ui] def adjust(kvstore: ElementTrackingStore, executionId: Long): Unit = {
+    val graph = kvstore.read(classOf[SparkPlanGraphWrapper], executionId)
+    val out = adjustments.foldLeft(graph) {
+      case (g, a) =>
+        a.apply(g)
+    }
+    assert(out.executionId == executionId)
+    kvstore.delete(classOf[SparkPlanGraphWrapper], executionId)
+    kvstore.write(out)
+  }
+}