From 124172a707b3053cfc407adcf1cb495a5f16f182 Mon Sep 17 00:00:00 2001 From: Liwei Lin Date: Thu, 13 Oct 2016 16:16:29 +0800 Subject: [PATCH] [SPARK-16845][SQL] `GeneratedClass$SpecificOrdering` grows beyond 64 KB --- .../codegen/GenerateOrdering.scala | 44 +++++++++++++++++-- .../catalyst/expressions/OrderingSuite.scala | 10 +++++ 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 1cef95654a17b..be996b011ce03 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -72,7 +72,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR * Generates the code for ordering based on the given order. */ def genComparisons(ctx: CodegenContext, ordering: Seq[SortOrder]): String = { - val comparisons = ordering.map { order => + def comparisons(orderingGroup: Seq[SortOrder]) = orderingGroup.map { order => val eval = order.child.genCode(ctx) val asc = order.isAscending val isNullA = ctx.freshName("isNullA") @@ -118,7 +118,45 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR } """ }.mkString("\n") - comparisons + + /* + * 40 = 7000 bytes / 170 (around 170 bytes per ordering comparison). + * The maximum byte code size to be compiled for HotSpot is 8000 bytes. + * We should keep less than 8000 bytes. + */ + val numberOfComparisonsThreshold = 40 + + if (ordering.size <= numberOfComparisonsThreshold) { + s""" + | InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. + | ${comparisons(ordering)} + """.stripMargin + } else { + val groupedOrderingItr = ordering.grouped(numberOfComparisonsThreshold) + var groupedOrderingLength = 0 + groupedOrderingItr.zipWithIndex.foreach { case (orderingGroup, i) => + groupedOrderingLength += 1 + val funcName = s"compare_$i" + val funcCode = + s""" + |private int $funcName(InternalRow a, InternalRow b) { + | InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. + | ${comparisons(orderingGroup)} + | return 0; + |} + """.stripMargin + ctx.addNewFunction(funcName, funcCode) + } + + (0 to groupedOrderingLength - 1).map { i => + s""" + |result = compare_$i(a, b); + |if (result != 0) { + | return result; + |} + """.stripMargin + }.mkString("") + } } protected def create(ordering: Seq[SortOrder]): BaseOrdering = { @@ -142,7 +180,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR ${ctx.declareAddedFunctions()} public int compare(InternalRow a, InternalRow b) { - InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. + int result; $comparisons return 0; } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala index 8cc2ab46c0c85..d2ae003c0f4fe 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala @@ -127,4 +127,14 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper { } } } + + test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") { + val sortOrder = Literal("abc").asc + + // This is passing prior to SPARK-16845, and it should also be passing after SPARK-16845 + GenerateOrdering.generate(Array.fill(40)(sortOrder)) + + // This is FAILING prior to SPARK-16845, but it should be passing after SPARK-16845 + GenerateOrdering.generate(Array.fill(450)(sortOrder)) + } }