Skip to content

Commit

Permalink
[SPARK-29084][SQL][TESTS] Check method bytecode size in BenchmarkQuer…
Browse files Browse the repository at this point in the history
…yTest

### What changes were proposed in this pull request?

This pr proposes to check method bytecode size in `BenchmarkQueryTest`. This metric is critical for performance numbers.

### Why are the changes needed?

For performance checks

### Does this PR introduce any user-facing change?

No

### How was this patch tested?

N/A

Closes #25788 from maropu/CheckMethodSize.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
  • Loading branch information
maropu authored and dongjoon-hyun committed Sep 22, 2019
1 parent 51d3509 commit 7a2ea58
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 7 deletions.
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql

import org.apache.spark.internal.config.Tests.IS_TESTING
import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodeGenerator}
import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeFormatter, CodeGenerator}
import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec}
import org.apache.spark.sql.test.SharedSparkSession
Expand Down Expand Up @@ -48,7 +48,7 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
RuleExecutor.resetMetrics()
}

protected def checkGeneratedCode(plan: SparkPlan): Unit = {
protected def checkGeneratedCode(plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = {
val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]()
plan foreach {
case s: WholeStageCodegenExec =>
Expand All @@ -57,7 +57,7 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
}
codegenSubtrees.toSeq.foreach { subtree =>
val code = subtree.doCodeGen()._2
try {
val (_, ByteCodeStats(maxMethodCodeSize, _, _)) = try {
// Just check the generated code can be properly compiled
CodeGenerator.compile(code)
} catch {
Expand All @@ -72,6 +72,11 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
""".stripMargin
throw new Exception(msg, e)
}

assert(!checkMethodCodeSize ||
maxMethodCodeSize <= CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT,
s"too long generated codes found in the WholeStageCodegenExec subtree (id=${subtree.id}) " +
s"and JIT optimization might not work:\n${subtree.treeString}")
}
}
}
Expand Up @@ -82,13 +82,19 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSSchema {
"q3", "q7", "q10", "q19", "q27", "q34", "q42", "q43", "q46", "q52", "q53", "q55", "q59",
"q63", "q65", "q68", "q73", "q79", "q89", "q98", "ss_max")

// List up the known queries having too large code in a generated function.
// A JIRA file for `modified-q3` is as follows;
// [SPARK-29128] Split predicate code in OR expressions
val blackListForMethodCodeSizeCheck = Set("modified-q3")

modifiedTPCDSQueries.foreach { name =>
val queryString = resourceToString(s"tpcds-modifiedQueries/$name.sql",
classLoader = Thread.currentThread().getContextClassLoader)
test(s"modified-$name") {
val testName = s"modified-$name"
test(testName) {
// check the plans can be properly generated
val plan = sql(queryString).queryExecution.executedPlan
checkGeneratedCode(plan)
checkGeneratedCode(plan, !blackListForMethodCodeSizeCheck.contains(testName))
}
}
}
Expand Up @@ -32,8 +32,9 @@ import org.apache.spark.sql.execution.window.WindowExec

class LogicalPlanTagInSparkPlanSuite extends TPCDSQuerySuite {

override protected def checkGeneratedCode(plan: SparkPlan): Unit = {
super.checkGeneratedCode(plan)
override protected def checkGeneratedCode(
plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = {
super.checkGeneratedCode(plan, checkMethodCodeSize)
checkLogicalPlanTag(plan)
}

Expand Down

0 comments on commit 7a2ea58

Please sign in to comment.