Skip to content

Commit

Permalink
[SPARK-22498][SQL] Fix 64KB JVM bytecode limit problem with concat
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

This PR changes `concat` code generation to place generated code for expression for arguments into separated methods if these size could be large.
This PR resolved the case of `concat` with a lot of argument

## How was this patch tested?

Added new test cases into `StringExpressionsSuite`

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #19728 from kiszk/SPARK-22498.
  • Loading branch information
kiszk authored and cloud-fan committed Nov 18, 2017
1 parent bf0c0ae commit d54bfec
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,27 @@ case class Concat(children: Seq[Expression]) extends Expression with ImplicitCas

override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val evals = children.map(_.genCode(ctx))
val inputs = evals.map { eval =>
s"${eval.isNull} ? null : ${eval.value}"
}.mkString(", ")
ev.copy(evals.map(_.code).mkString("\n") + s"""
boolean ${ev.isNull} = false;
UTF8String ${ev.value} = UTF8String.concat($inputs);
if (${ev.value} == null) {
${ev.isNull} = true;
}
val args = ctx.freshName("args")

val inputs = evals.zipWithIndex.map { case (eval, index) =>
s"""
${eval.code}
if (!${eval.isNull}) {
$args[$index] = ${eval.value};
}
"""
}
val codes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
ctx.splitExpressions(inputs, "valueConcat",
("InternalRow", ctx.INPUT_ROW) :: ("UTF8String[]", args) :: Nil)
} else {
inputs.mkString("\n")
}
ev.copy(s"""
UTF8String[] $args = new UTF8String[${evals.length}];
$codes
UTF8String ${ev.value} = UTF8String.concat($args);
boolean ${ev.isNull} = ${ev.value} == null;
""")
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
// scalastyle:on
}

test("SPARK-22498: Concat should not generate codes beyond 64KB") {
val N = 5000
val strs = (1 to N).map(x => s"s$x")
checkEvaluation(Concat(strs.map(Literal.create(_, StringType))), strs.mkString, EmptyRow)
}

test("concat_ws") {
def testConcatWs(expected: String, sep: String, inputs: Any*): Unit = {
val inputExprs = inputs.map {
Expand Down

0 comments on commit d54bfec

Please sign in to comment.