From 153e170fe5a478d04559d430f566478a6e48528f Mon Sep 17 00:00:00 2001 From: Hiroshi Inoue Date: Fri, 1 Jul 2016 02:25:24 +0900 Subject: [PATCH 1/2] add check of # children to avoid redundant iteration in transformChildren --- .../spark/sql/catalyst/trees/TreeNode.scala | 83 ++++++++++--------- 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 072445af4f41f..4bc7438ea9ba8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -315,25 +315,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { protected def transformChildren( rule: PartialFunction[BaseType, BaseType], nextOperation: (BaseType, PartialFunction[BaseType, BaseType]) => BaseType): BaseType = { - var changed = false - val newArgs = mapProductIterator { - case arg: TreeNode[_] if containsChild(arg) => - val newChild = nextOperation(arg.asInstanceOf[BaseType], rule) - if (!(newChild fastEquals arg)) { - changed = true - newChild - } else { - arg - } - case Some(arg: TreeNode[_]) if containsChild(arg) => - val newChild = nextOperation(arg.asInstanceOf[BaseType], rule) - if (!(newChild fastEquals arg)) { - changed = true - Some(newChild) - } else { - Some(arg) - } - case m: Map[_, _] => m.mapValues { + if (children.nonEmpty) { + var changed = false + val newArgs = mapProductIterator { case arg: TreeNode[_] if containsChild(arg) => val newChild = nextOperation(arg.asInstanceOf[BaseType], rule) if (!(newChild fastEquals arg)) { @@ -342,33 +326,54 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { } else { arg } - case other => other - }.view.force // `mapValues` is lazy and we need to force it to materialize - case d: DataType => d // Avoid unpacking Structs - case args: Traversable[_] => args.map { - case arg: TreeNode[_] if containsChild(arg) => + case Some(arg: TreeNode[_]) if containsChild(arg) => val newChild = nextOperation(arg.asInstanceOf[BaseType], rule) if (!(newChild fastEquals arg)) { changed = true - newChild + Some(newChild) } else { - arg + Some(arg) } - case tuple @ (arg1: TreeNode[_], arg2: TreeNode[_]) => - val newChild1 = nextOperation(arg1.asInstanceOf[BaseType], rule) - val newChild2 = nextOperation(arg2.asInstanceOf[BaseType], rule) - if (!(newChild1 fastEquals arg1) || !(newChild2 fastEquals arg2)) { - changed = true - (newChild1, newChild2) - } else { - tuple - } - case other => other + case m: Map[_, _] => m.mapValues { + case arg: TreeNode[_] if containsChild(arg) => + val newChild = nextOperation(arg.asInstanceOf[BaseType], rule) + if (!(newChild fastEquals arg)) { + changed = true + newChild + } else { + arg + } + case other => other + }.view.force // `mapValues` is lazy and we need to force it to materialize + case d: DataType => d // Avoid unpacking Structs + case args: Traversable[_] => args.map { + case arg: TreeNode[_] if containsChild(arg) => + val newChild = nextOperation(arg.asInstanceOf[BaseType], rule) + if (!(newChild fastEquals arg)) { + changed = true + newChild + } else { + arg + } + case tuple@(arg1: TreeNode[_], arg2: TreeNode[_]) => + val newChild1 = nextOperation(arg1.asInstanceOf[BaseType], rule) + val newChild2 = nextOperation(arg2.asInstanceOf[BaseType], rule) + if (!(newChild1 fastEquals arg1) || !(newChild2 fastEquals arg2)) { + changed = true + (newChild1, newChild2) + } else { + tuple + } + case other => other + } + case nonChild: AnyRef => nonChild + case null => null } - case nonChild: AnyRef => nonChild - case null => null + if (changed) makeCopy(newArgs) else this + } + else { + this } - if (changed) makeCopy(newArgs) else this } /** From 639090d8ab6576106b38d7c4108af0e924bb673f Mon Sep 17 00:00:00 2001 From: Hiroshi Inoue Date: Fri, 1 Jul 2016 10:06:05 +0900 Subject: [PATCH 2/2] coding style fix --- .../scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 4bc7438ea9ba8..8bce404735785 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -370,8 +370,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { case null => null } if (changed) makeCopy(newArgs) else this - } - else { + } else { this } }