Skip to content

Commit

Permalink
Revert "[SPARK-18016][SQL][CATALYST][BRANCH-2.2] Code Generation: Con…
Browse files Browse the repository at this point in the history
…stant Pool Limit - Class Splitting"

This reverts commit 198e3a0.
  • Loading branch information
cloud-fan committed Jun 23, 2017
1 parent d625734 commit b99c0e9
Show file tree
Hide file tree
Showing 21 changed files with 79 additions and 248 deletions.
7 changes: 0 additions & 7 deletions sql/catalyst/pom.xml
Expand Up @@ -131,13 +131,6 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<configuration>
<argLine>-Xmx4g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
</configuration>
</plugin>
<plugin>
<groupId>org.antlr</groupId>
<artifactId>antlr4-maven-plugin</artifactId>
Expand Down
Expand Up @@ -988,7 +988,7 @@ case class ScalaUDF(
val converterTerm = ctx.freshName("converter")
val expressionIdx = ctx.references.size - 1
ctx.addMutableState(converterClassName, converterTerm,
s"$converterTerm = ($converterClassName)$typeConvertersClassName" +
s"this.$converterTerm = ($converterClassName)$typeConvertersClassName" +
s".createToScalaConverter(((${expressionClassName})((($scalaUDFClassName)" +
s"references[$expressionIdx]).getChildren().apply($index))).dataType());")
converterTerm
Expand All @@ -1005,7 +1005,7 @@ case class ScalaUDF(
// Generate codes used to convert the returned value of user-defined functions to Catalyst type
val catalystConverterTerm = ctx.freshName("catalystConverter")
ctx.addMutableState(converterClassName, catalystConverterTerm,
s"$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
s"this.$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
s".createToCatalystConverter($scalaUDF.dataType());")

val resultTerm = ctx.freshName("result")
Expand All @@ -1019,7 +1019,7 @@ case class ScalaUDF(

val funcTerm = ctx.freshName("udf")
ctx.addMutableState(funcClassName, funcTerm,
s"$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
s"this.$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")

// codegen for children expressions
val evals = children.map(_.genCode(ctx))
Expand Down
Expand Up @@ -113,7 +113,7 @@ class CodegenContext {
val idx = references.length
references += obj
val clsName = Option(className).getOrElse(obj.getClass.getName)
addMutableState(clsName, term, s"$term = ($clsName) references[$idx];")
addMutableState(clsName, term, s"this.$term = ($clsName) references[$idx];")
term
}

Expand Down Expand Up @@ -202,6 +202,16 @@ class CodegenContext {
partitionInitializationStatements.mkString("\n")
}

/**
* Holding all the functions those will be added into generated class.
*/
val addedFunctions: mutable.Map[String, String] =
mutable.Map.empty[String, String]

def addNewFunction(funcName: String, funcCode: String): Unit = {
addedFunctions += ((funcName, funcCode))
}

/**
* Holds expressions that are equivalent. Used to perform subexpression elimination
* during codegen.
Expand All @@ -223,118 +233,10 @@ class CodegenContext {
// The collection of sub-expression result resetting methods that need to be called on each row.
val subexprFunctions = mutable.ArrayBuffer.empty[String]

private val outerClassName = "OuterClass"

/**
* Holds the class and instance names to be generated, where `OuterClass` is a placeholder
* standing for whichever class is generated as the outermost class and which will contain any
* nested sub-classes. All other classes and instance names in this list will represent private,
* nested sub-classes.
*/
private val classes: mutable.ListBuffer[(String, String)] =
mutable.ListBuffer[(String, String)](outerClassName -> null)

// A map holding the current size in bytes of each class to be generated.
private val classSize: mutable.Map[String, Int] =
mutable.Map[String, Int](outerClassName -> 0)

// Nested maps holding function names and their code belonging to each class.
private val classFunctions: mutable.Map[String, mutable.Map[String, String]] =
mutable.Map(outerClassName -> mutable.Map.empty[String, String])

// Returns the size of the most recently added class.
private def currClassSize(): Int = classSize(classes.head._1)

// Returns the class name and instance name for the most recently added class.
private def currClass(): (String, String) = classes.head

// Adds a new class. Requires the class' name, and its instance name.
private def addClass(className: String, classInstance: String): Unit = {
classes.prepend(className -> classInstance)
classSize += className -> 0
classFunctions += className -> mutable.Map.empty[String, String]
def declareAddedFunctions(): String = {
addedFunctions.map { case (funcName, funcCode) => funcCode }.mkString("\n")
}

/**
* Adds a function to the generated class. If the code for the `OuterClass` grows too large, the
* function will be inlined into a new private, nested class, and a instance-qualified name for
* the function will be returned. Otherwise, the function will be inined to the `OuterClass` the
* simple `funcName` will be returned.
*
* @param funcName the class-unqualified name of the function
* @param funcCode the body of the function
* @param inlineToOuterClass whether the given code must be inlined to the `OuterClass`. This
* can be necessary when a function is declared outside of the context
* it is eventually referenced and a returned qualified function name
* cannot otherwise be accessed.
* @return the name of the function, qualified by class if it will be inlined to a private,
* nested sub-class
*/
def addNewFunction(
funcName: String,
funcCode: String,
inlineToOuterClass: Boolean = false): String = {
// The number of named constants that can exist in the class is limited by the Constant Pool
// limit, 65,536. We cannot know how many constants will be inserted for a class, so we use a
// threshold of 1600k bytes to determine when a function should be inlined to a private, nested
// sub-class.
val (className, classInstance) = if (inlineToOuterClass) {
outerClassName -> ""
} else if (currClassSize > 1600000) {
val className = freshName("NestedClass")
val classInstance = freshName("nestedClassInstance")

addClass(className, classInstance)

className -> classInstance
} else {
currClass()
}

classSize(className) += funcCode.length
classFunctions(className) += funcName -> funcCode

if (className == outerClassName) {
funcName
} else {

s"$classInstance.$funcName"
}
}

/**
* Instantiates all nested, private sub-classes as objects to the `OuterClass`
*/
private[sql] def initNestedClasses(): String = {
// Nested, private sub-classes have no mutable state (though they do reference the outer class'
// mutable state), so we declare and initialize them inline to the OuterClass.
classes.filter(_._1 != outerClassName).map {
case (className, classInstance) =>
s"private $className $classInstance = new $className();"
}.mkString("\n")
}

/**
* Declares all function code that should be inlined to the `OuterClass`.
*/
private[sql] def declareAddedFunctions(): String = {
classFunctions(outerClassName).values.mkString("\n")
}

/**
* Declares all nested, private sub-classes and the function code that should be inlined to them.
*/
private[sql] def declareNestedClasses(): String = {
classFunctions.filterKeys(_ != outerClassName).map {
case (className, functions) =>
s"""
|private class $className {
| ${functions.values.mkString("\n")}
|}
""".stripMargin
}
}.mkString("\n")

final val JAVA_BOOLEAN = "boolean"
final val JAVA_BYTE = "byte"
final val JAVA_SHORT = "short"
Expand Down Expand Up @@ -654,7 +556,8 @@ class CodegenContext {
return 0;
}
"""
s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
addNewFunction(compareFunc, funcCode)
s"this.$compareFunc($c1, $c2)"
case schema: StructType =>
val comparisons = GenerateOrdering.genComparisons(this, schema)
val compareFunc = freshName("compareStruct")
Expand All @@ -670,7 +573,8 @@ class CodegenContext {
return 0;
}
"""
s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
addNewFunction(compareFunc, funcCode)
s"this.$compareFunc($c1, $c2)"
case other if other.isInstanceOf[AtomicType] => s"$c1.compare($c2)"
case udt: UserDefinedType[_] => genComp(udt.sqlType, c1, c2)
case _ =>
Expand Down Expand Up @@ -785,6 +689,7 @@ class CodegenContext {
|}
""".stripMargin
addNewFunction(name, code)
name
}

foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
Expand Down Expand Up @@ -868,6 +773,8 @@ class CodegenContext {
|}
""".stripMargin

addNewFunction(fnName, fn)

// Add a state and a mapping of the common subexpressions that are associate with this
// state. Adding this expression to subExprEliminationExprMap means it will call `fn`
// when it is code generated. This decision should be a cost based one.
Expand All @@ -885,7 +792,7 @@ class CodegenContext {
addMutableState(javaType(expr.dataType), value,
s"$value = ${defaultValue(expr.dataType)};")

subexprFunctions += s"${addNewFunction(fnName, fn)}($INPUT_ROW);"
subexprFunctions += s"$fnName($INPUT_ROW);"
val state = SubExprEliminationState(isNull, value)
e.foreach(subExprEliminationExprs.put(_, state))
}
Expand Down
Expand Up @@ -63,21 +63,21 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
if (e.nullable) {
val isNull = s"isNull_$i"
val value = s"value_$i"
ctx.addMutableState("boolean", isNull, s"$isNull = true;")
ctx.addMutableState("boolean", isNull, s"this.$isNull = true;")
ctx.addMutableState(ctx.javaType(e.dataType), value,
s"$value = ${ctx.defaultValue(e.dataType)};")
s"this.$value = ${ctx.defaultValue(e.dataType)};")
s"""
${ev.code}
$isNull = ${ev.isNull};
$value = ${ev.value};
this.$isNull = ${ev.isNull};
this.$value = ${ev.value};
"""
} else {
val value = s"value_$i"
ctx.addMutableState(ctx.javaType(e.dataType), value,
s"$value = ${ctx.defaultValue(e.dataType)};")
s"this.$value = ${ctx.defaultValue(e.dataType)};")
s"""
${ev.code}
$value = ${ev.value};
this.$value = ${ev.value};
"""
}
}
Expand All @@ -87,7 +87,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP

val updates = validExpr.zip(index).map {
case (e, i) =>
val ev = ExprCode("", s"isNull_$i", s"value_$i")
val ev = ExprCode("", s"this.isNull_$i", s"this.value_$i")
ctx.updateColumn("mutableRow", e.dataType, i, ev, e.nullable)
}

Expand Down Expand Up @@ -135,9 +135,6 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
$allUpdates
return mutableRow;
}

${ctx.initNestedClasses()}
${ctx.declareNestedClasses()}
}
"""

Expand Down
Expand Up @@ -179,9 +179,6 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
$comparisons
return 0;
}

${ctx.initNestedClasses()}
${ctx.declareNestedClasses()}
}"""

val code = CodeFormatter.stripOverlappingComments(
Expand Down
Expand Up @@ -72,9 +72,6 @@ object GeneratePredicate extends CodeGenerator[Expression, Predicate] {
${eval.code}
return !${eval.isNull} && ${eval.value};
}

${ctx.initNestedClasses()}
${ctx.declareNestedClasses()}
}"""

val code = CodeFormatter.stripOverlappingComments(
Expand Down
Expand Up @@ -49,7 +49,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
val output = ctx.freshName("safeRow")
val values = ctx.freshName("values")
// These expressions could be split into multiple functions
ctx.addMutableState("Object[]", values, s"$values = null;")
ctx.addMutableState("Object[]", values, s"this.$values = null;")

val rowClass = classOf[GenericInternalRow].getName

Expand All @@ -65,10 +65,10 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
val allFields = ctx.splitExpressions(tmp, fieldWriters)
val code = s"""
final InternalRow $tmp = $input;
$values = new Object[${schema.length}];
this.$values = new Object[${schema.length}];
$allFields
final InternalRow $output = new $rowClass($values);
$values = null;
this.$values = null;
"""

ExprCode(code, "false", output)
Expand Down Expand Up @@ -184,9 +184,6 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
$allExpressions
return mutableRow;
}

${ctx.initNestedClasses()}
${ctx.declareNestedClasses()}
}
"""

Expand Down
Expand Up @@ -82,7 +82,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
val rowWriterClass = classOf[UnsafeRowWriter].getName
val rowWriter = ctx.freshName("rowWriter")
ctx.addMutableState(rowWriterClass, rowWriter,
s"$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
s"this.$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")

val resetWriter = if (isTopLevel) {
// For top level row writer, it always writes to the beginning of the global buffer holder,
Expand Down Expand Up @@ -182,7 +182,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
val arrayWriterClass = classOf[UnsafeArrayWriter].getName
val arrayWriter = ctx.freshName("arrayWriter")
ctx.addMutableState(arrayWriterClass, arrayWriter,
s"$arrayWriter = new $arrayWriterClass();")
s"this.$arrayWriter = new $arrayWriterClass();")
val numElements = ctx.freshName("numElements")
val index = ctx.freshName("index")
val element = ctx.freshName("element")
Expand Down Expand Up @@ -321,7 +321,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
val holder = ctx.freshName("holder")
val holderClass = classOf[BufferHolder].getName
ctx.addMutableState(holderClass, holder,
s"$holder = new $holderClass($result, ${numVarLenFields * 32});")
s"this.$holder = new $holderClass($result, ${numVarLenFields * 32});")

val resetBufferHolder = if (numVarLenFields == 0) {
""
Expand Down Expand Up @@ -402,9 +402,6 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
${eval.code.trim}
return ${eval.value};
}

${ctx.initNestedClasses()}
${ctx.declareNestedClasses()}
}
"""

Expand Down
Expand Up @@ -93,7 +93,7 @@ private [sql] object GenArrayData {
if (!ctx.isPrimitiveType(elementType)) {
val genericArrayClass = classOf[GenericArrayData].getName
ctx.addMutableState("Object[]", arrayName,
s"$arrayName = new Object[${numElements}];")
s"this.$arrayName = new Object[${numElements}];")

val assignments = elementsCode.zipWithIndex.map { case (eval, i) =>
val isNullAssignment = if (!isMapKey) {
Expand Down Expand Up @@ -340,7 +340,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val rowClass = classOf[GenericInternalRow].getName
val values = ctx.freshName("values")
ctx.addMutableState("Object[]", values, s"$values = null;")
ctx.addMutableState("Object[]", values, s"this.$values = null;")

ev.copy(code = s"""
$values = new Object[${valExprs.size}];""" +
Expand All @@ -357,7 +357,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
}) +
s"""
final InternalRow ${ev.value} = new $rowClass($values);
$values = null;
this.$values = null;
""", isNull = "false")
}

Expand Down

0 comments on commit b99c0e9

Please sign in to comment.