-
Notifications
You must be signed in to change notification settings - Fork 28k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-10371][SQL] Implement subexpr elimination for UnsafeProjections
This patch adds the building blocks for codegening subexpr elimination and implements it end to end for UnsafeProjection. The building blocks can be used to do the same thing for other operators. It introduces some utilities to compute common sub expressions. Expressions can be added to this data structure. The expr and its children will be recursively matched against existing expressions (ones previously added) and grouped into common groups. This is built using the existing `semanticEquals`. It does not understand things like commutative or associative expressions. This can be done as future work. After building this data structure, the codegen process takes advantage of it by: 1. Generating a helper function in the generated class that computes the common subexpression. This is done for all common subexpressions that have at least two occurrences and the expression tree is sufficiently complex. 2. When generating the apply() function, if the helper function exists, call that instead of regenerating the expression tree. Repeated calls to the helper function shortcircuit the evaluation logic. Author: Nong Li <nong@databricks.com> Author: Nong Li <nongli@gmail.com> This patch had conflicts when merged, resolved by Committer: Michael Armbrust <michael@databricks.com> Closes #9480 from nongli/spark-10371. (cherry picked from commit 87aedc4) Signed-off-by: Michael Armbrust <michael@databricks.com>
- Loading branch information
Showing
11 changed files
with
523 additions
and
16 deletions.
There are no files selected for viewing
106 changes: 106 additions & 0 deletions
106
...lyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.catalyst.expressions | ||
|
||
import scala.collection.mutable | ||
|
||
/** | ||
* This class is used to compute equality of (sub)expression trees. Expressions can be added | ||
* to this class and they subsequently query for expression equality. Expression trees are | ||
* considered equal if for the same input(s), the same result is produced. | ||
*/ | ||
class EquivalentExpressions { | ||
/** | ||
* Wrapper around an Expression that provides semantic equality. | ||
*/ | ||
case class Expr(e: Expression) { | ||
val hash = e.semanticHash() | ||
override def equals(o: Any): Boolean = o match { | ||
case other: Expr => e.semanticEquals(other.e) | ||
case _ => false | ||
} | ||
override def hashCode: Int = hash | ||
} | ||
|
||
// For each expression, the set of equivalent expressions. | ||
private val equivalenceMap: mutable.HashMap[Expr, mutable.MutableList[Expression]] = | ||
new mutable.HashMap[Expr, mutable.MutableList[Expression]] | ||
|
||
/** | ||
* Adds each expression to this data structure, grouping them with existing equivalent | ||
* expressions. Non-recursive. | ||
* Returns if there was already a matching expression. | ||
*/ | ||
def addExpr(expr: Expression): Boolean = { | ||
if (expr.deterministic) { | ||
val e: Expr = Expr(expr) | ||
val f = equivalenceMap.get(e) | ||
if (f.isDefined) { | ||
f.get.+= (expr) | ||
true | ||
} else { | ||
equivalenceMap.put(e, mutable.MutableList(expr)) | ||
false | ||
} | ||
} else { | ||
false | ||
} | ||
} | ||
|
||
/** | ||
* Adds the expression to this datastructure recursively. Stops if a matching expression | ||
* is found. That is, if `expr` has already been added, its children are not added. | ||
* If ignoreLeaf is true, leaf nodes are ignored. | ||
*/ | ||
def addExprTree(root: Expression, ignoreLeaf: Boolean = true): Unit = { | ||
val skip = root.isInstanceOf[LeafExpression] && ignoreLeaf | ||
if (!skip && root.deterministic && !addExpr(root)) { | ||
root.children.foreach(addExprTree(_, ignoreLeaf)) | ||
} | ||
} | ||
|
||
/** | ||
* Returns all fo the expression trees that are equivalent to `e`. Returns | ||
* an empty collection if there are none. | ||
*/ | ||
def getEquivalentExprs(e: Expression): Seq[Expression] = { | ||
equivalenceMap.get(Expr(e)).getOrElse(mutable.MutableList()) | ||
} | ||
|
||
/** | ||
* Returns all the equivalent sets of expressions. | ||
*/ | ||
def getAllEquivalentExprs: Seq[Seq[Expression]] = { | ||
equivalenceMap.values.map(_.toSeq).toSeq | ||
} | ||
|
||
/** | ||
* Returns the state of the datastructure as a string. If all is false, skips sets of equivalent | ||
* expressions with cardinality 1. | ||
*/ | ||
def debugString(all: Boolean = false): String = { | ||
val sb: mutable.StringBuilder = new StringBuilder() | ||
sb.append("Equivalent expressions:\n") | ||
equivalenceMap.foreach { case (k, v) => { | ||
if (all || v.length > 1) { | ||
sb.append(" " + v.mkString(", ")).append("\n") | ||
} | ||
}} | ||
sb.toString() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.