From 1e1e4418003cc13b0e2a118a315f6aa2422c3f14 Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Fri, 27 May 2016 11:54:36 -0700 Subject: [PATCH 1/2] Improve the aggregate output. --- .../sql/catalyst/expressions/Expression.scala | 3 +++ .../spark/sql/catalyst/plans/QueryPlan.scala | 2 ++ .../spark/sql/catalyst/trees/TreeNode.scala | 23 ++++++++++++++----- .../spark/sql/execution/QueryExecution.scala | 16 +++++++------ .../sql/execution/WholeStageCodegenExec.scala | 6 +++-- .../aggregate/HashAggregateExec.scala | 12 ++++++++-- .../aggregate/SortAggregateExec.scala | 12 ++++++++-- 7 files changed, 55 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 2ec46216e1cdb..6a085dc65365c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -190,6 +190,9 @@ abstract class Expression extends TreeNode[Expression] { case single => single :: Nil } + // Marks this as final, verboseString of Expression is NEVER called. + final override def verboseString: String = simpleString + override def simpleString: String = toString override def toString: String = prettyName + flatArguments.mkString("(", ", ", ")") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 19a66cff4fae4..cf34f4b30d8d8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -257,6 +257,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT override def simpleString: String = statePrefix + super.simpleString + override def verboseString: String = simpleString + /** * All the subqueries of current plan. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 22d82c61080c0..c67366d240a30 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -462,10 +462,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { /** ONE line description of this node. */ def simpleString: String = s"$nodeName $argString".trim + /** ONE line description of this node with more information */ + def verboseString: String + override def toString: String = treeString /** Returns a string representation of the nodes in this tree */ - def treeString: String = generateTreeString(0, Nil, new StringBuilder).toString + def treeString: String = treeString(verbose = true) + + def treeString(verbose: Boolean): String = { + generateTreeString(0, Nil, new StringBuilder, verbose).toString + } /** * Returns a string representation of the nodes in this tree, where each operator is numbered. @@ -508,6 +515,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { depth: Int, lastChildren: Seq[Boolean], builder: StringBuilder, + verbose: Boolean, prefix: String = ""): StringBuilder = { if (depth > 0) { lastChildren.init.foreach { isLast => @@ -520,18 +528,21 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { } builder.append(prefix) - builder.append(simpleString) + val headline = if (verbose) verboseString else simpleString + builder.append(headline) builder.append("\n") if (innerChildren.nonEmpty) { innerChildren.init.foreach(_.generateTreeString( - depth + 2, lastChildren :+ false :+ false, builder)) - innerChildren.last.generateTreeString(depth + 2, lastChildren :+ false :+ true, builder) + depth + 2, lastChildren :+ false :+ false, builder, verbose)) + innerChildren.last.generateTreeString( + depth + 2, lastChildren :+ false :+ true, builder, verbose) } if (children.nonEmpty) { - children.init.foreach(_.generateTreeString(depth + 1, lastChildren :+ false, builder, prefix)) - children.last.generateTreeString(depth + 1, lastChildren :+ true, builder, prefix) + children.init.foreach( + _.generateTreeString(depth + 1, lastChildren :+ false, builder, verbose, prefix)) + children.last.generateTreeString(depth + 1, lastChildren :+ true, builder, verbose, prefix) } builder diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index 330459c11ea98..d79b1d8ee21d6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -201,24 +201,26 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) { def simpleString: String = { s"""== Physical Plan == - |${stringOrError(executedPlan)} + |${stringOrError(executedPlan.treeString(verbose = false))} """.stripMargin.trim } override def toString: String = { def output = analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}").mkString(", ") - val analyzedPlan = - Seq(stringOrError(output), stringOrError(analyzed)).filter(_.nonEmpty).mkString("\n") + val analyzedPlan = Seq( + stringOrError(output), + stringOrError(analyzed.treeString(verbose = true)) + ).filter(_.nonEmpty).mkString("\n") s"""== Parsed Logical Plan == - |${stringOrError(logical)} + |${stringOrError(logical.treeString(verbose = true))} |== Analyzed Logical Plan == - |$analyzedPlan + |${analyzedPlan} |== Optimized Logical Plan == - |${stringOrError(optimizedPlan)} + |${stringOrError(optimizedPlan.treeString(verbose = true))} |== Physical Plan == - |${stringOrError(executedPlan)} + |${stringOrError(executedPlan.treeString(verbose = true))} """.stripMargin.trim } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index d3e8d4e8e41a2..e0d8e35713291 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -250,8 +250,9 @@ case class InputAdapter(child: SparkPlan) extends UnaryExecNode with CodegenSupp depth: Int, lastChildren: Seq[Boolean], builder: StringBuilder, + verbose: Boolean, prefix: String = ""): StringBuilder = { - child.generateTreeString(depth, lastChildren, builder, "") + child.generateTreeString(depth, lastChildren, builder, verbose, "") } } @@ -407,8 +408,9 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co depth: Int, lastChildren: Seq[Boolean], builder: StringBuilder, + verbose: Boolean, prefix: String = ""): StringBuilder = { - child.generateTreeString(depth, lastChildren, builder, "*") + child.generateTreeString(depth, lastChildren, builder, verbose, "*") } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala index f270ca07554f5..b617e26418c7e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala @@ -764,7 +764,11 @@ case class HashAggregateExec( """ } - override def simpleString: String = { + override def verboseString: String = toString(verbose = true) + + override def simpleString: String = toString(verbose = false) + + private def toString(verbose: Boolean): String = { val allAggregateExpressions = aggregateExpressions testFallbackStartsAt match { @@ -772,7 +776,11 @@ case class HashAggregateExec( val keyString = groupingExpressions.mkString("[", ",", "]") val functionString = allAggregateExpressions.mkString("[", ",", "]") val outputString = output.mkString("[", ",", "]") - s"HashAggregate(key=$keyString, functions=$functionString, output=$outputString)" + if (verbose) { + s"HashAggregate(key=$keyString, functions=$functionString, output=$outputString)" + } else { + s"HashAggregate(key=$keyString, functions=$functionString)" + } case Some(fallbackStartsAt) => s"HashAggregateWithControlledFallback $groupingExpressions " + s"$allAggregateExpressions $resultExpressions fallbackStartsAt=$fallbackStartsAt" diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala index 9e48ff8d707bd..41ba9f5b3feda 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala @@ -103,12 +103,20 @@ case class SortAggregateExec( } } - override def simpleString: String = { + override def simpleString: String = toString(verbose = false) + + override def verboseString: String = toString(verbose = true) + + private def toString(verbose: Boolean): String = { val allAggregateExpressions = aggregateExpressions val keyString = groupingExpressions.mkString("[", ",", "]") val functionString = allAggregateExpressions.mkString("[", ",", "]") val outputString = output.mkString("[", ",", "]") - s"SortAggregate(key=$keyString, functions=$functionString, output=$outputString)" + if (verbose) { + s"SortAggregate(key=$keyString, functions=$functionString, output=$outputString)" + } else { + s"SortAggregate(key=$keyString, functions=$functionString)" + } } } From 1ed295f12b3da10c0a2ffed69d19ed1ba2f0e010 Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Mon, 6 Jun 2016 17:01:09 -0700 Subject: [PATCH 2/2] on liancheng's comment --- .../org/apache/spark/sql/catalyst/expressions/Expression.scala | 3 ++- .../scala/org/apache/spark/sql/execution/QueryExecution.scala | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 6a085dc65365c..efe592d69ddc8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -190,7 +190,8 @@ abstract class Expression extends TreeNode[Expression] { case single => single :: Nil } - // Marks this as final, verboseString of Expression is NEVER called. + // Marks this as final, Expression.verboseString should never be called, and thus shouldn't be + // overridden by concrete classes. final override def verboseString: String = simpleString override def simpleString: String = toString diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index d79b1d8ee21d6..560214a65e6ec 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -216,7 +216,7 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) { s"""== Parsed Logical Plan == |${stringOrError(logical.treeString(verbose = true))} |== Analyzed Logical Plan == - |${analyzedPlan} + |$analyzedPlan |== Optimized Logical Plan == |${stringOrError(optimizedPlan.treeString(verbose = true))} |== Physical Plan ==