Skip to content

Commit

Permalink
[SPARK-22693][SQL] CreateNamedStruct and InSet should not use global …
Browse files Browse the repository at this point in the history
…variables

## What changes were proposed in this pull request?

CreateNamedStruct and InSet are using a global variable which is not needed. This can generate some unneeded entries in the constant pool.

The PR removes the unnecessary mutable states and makes them local variables.

## How was this patch tested?

added UT

Author: Marco Gaido <marcogaido91@gmail.com>
Author: Marco Gaido <mgaido@hortonworks.com>

Closes #19896 from mgaido91/SPARK-22693.
  • Loading branch information
mgaido91 authored and gatorsmile committed Dec 6, 2017
1 parent 9948b86 commit f110a7f
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -356,22 +356,25 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val rowClass = classOf[GenericInternalRow].getName
val values = ctx.freshName("values")
ctx.addMutableState("Object[]", values, s"$values = null;")
val valCodes = valExprs.zipWithIndex.map { case (e, i) =>
val eval = e.genCode(ctx)
s"""
|${eval.code}
|if (${eval.isNull}) {
| $values[$i] = null;
|} else {
| $values[$i] = ${eval.value};
|}
""".stripMargin
}
val valuesCode = ctx.splitExpressionsWithCurrentInputs(
valExprs.zipWithIndex.map { case (e, i) =>
val eval = e.genCode(ctx)
s"""
${eval.code}
if (${eval.isNull}) {
$values[$i] = null;
} else {
$values[$i] = ${eval.value};
}"""
})
expressions = valCodes,
funcName = "createNamedStruct",
extraArguments = "Object[]" -> values :: Nil)

ev.copy(code =
s"""
|$values = new Object[${valExprs.size}];
|Object[] $values = new Object[${valExprs.size}];
|$valuesCode
|final InternalRow ${ev.value} = new $rowClass($values);
|$values = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,17 +344,17 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
} else {
""
}
ctx.addMutableState(setName, setTerm,
s"$setTerm = (($InSetName)references[${ctx.references.size - 1}]).getSet();")
ev.copy(code = s"""
${childGen.code}
boolean ${ev.isNull} = ${childGen.isNull};
boolean ${ev.value} = false;
if (!${ev.isNull}) {
${ev.value} = $setTerm.contains(${childGen.value});
$setNull
}
""")
ev.copy(code =
s"""
|${childGen.code}
|${ctx.JAVA_BOOLEAN} ${ev.isNull} = ${childGen.isNull};
|${ctx.JAVA_BOOLEAN} ${ev.value} = false;
|if (!${ev.isNull}) {
| $setName $setTerm = (($InSetName)references[${ctx.references.size - 1}]).getSet();
| ${ev.value} = $setTerm.contains(${childGen.value});
| $setNull
|}
""".stripMargin)
}

override def sql: String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String

Expand Down Expand Up @@ -299,4 +300,10 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
new StringToMap(Literal("a=1_b=2_c=3"), Literal("_"), NonFoldableLiteral("="))
.checkInputDataTypes().isFailure)
}

test("SPARK-22693: CreateNamedStruct should not use global variables") {
val ctx = new CodegenContext
CreateNamedStruct(Seq("a", "x", "b", 2.0)).genCode(ctx)
assert(ctx.mutableStates.isEmpty)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.RandomDataGenerator
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.encoders.ExamplePointUDT
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
import org.apache.spark.sql.types._

Expand Down Expand Up @@ -429,4 +430,10 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
val infinity = Literal(Double.PositiveInfinity)
checkEvaluation(EqualTo(infinity, infinity), true)
}

test("SPARK-22693: InSet should not use global variables") {
val ctx = new CodegenContext
InSet(Literal(1), Set(1, 2, 3, 4)).genCode(ctx)
assert(ctx.mutableStates.isEmpty)
}
}

0 comments on commit f110a7f

Please sign in to comment.