From 6ee910611e7b6311f4ec1c5afb0ce5274fcf6805 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Mon, 29 Jan 2018 00:35:06 +0000 Subject: [PATCH 1/5] initial commit --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 5 +++-- .../apache/spark/sql/execution/WholeStageCodegenExec.scala | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 4dcbb702893da..72a6656fb9820 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1227,12 +1227,13 @@ class CodegenContext { /** * Register a comment and return the corresponding place holder */ - def registerComment(text: => String): String = { + def registerComment(text: => String, forceComment: Boolean = false): String = { // By default, disable comments in generated code because computing the comments themselves can // be extremely expensive in certain cases, such as deeply-nested expressions which operate over // inputs with wide schemas. For more details on the performance issues that motivated this // flat, see SPARK-15680. - if (SparkEnv.get != null && SparkEnv.get.conf.getBoolean("spark.sql.codegen.comments", false)) { + if (forceComment || + SparkEnv.get != null && SparkEnv.get.conf.getBoolean("spark.sql.codegen.comments", false)) { val name = freshName("c") val comment = if (text.contains("\n") || text.contains("\r")) { text.split("(\r\n)|\r|\n").mkString("/**\n * ", "\n * ", "\n */") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index 0e525b1e22eb9..b6c3cfeb293b4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -542,6 +542,7 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int) s"""Codegend pipeline for stage (id=$codegenStageId) |${this.treeString.trim}""".stripMargin)} final class $className extends ${classOf[BufferedRowIterator].getName} { + ${ctx.registerComment(s"codegenStageId=$codegenStageId", true)} private Object[] references; private scala.collection.Iterator[] inputs; From 4a09eac999ba223d379b96ac8335a2fc50ff52c9 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Mon, 29 Jan 2018 17:48:53 +0000 Subject: [PATCH 2/5] address review comment --- .../org/apache/spark/sql/execution/WholeStageCodegenExec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index b6c3cfeb293b4..ffcbc9223bd96 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -541,8 +541,8 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int) ${ctx.registerComment( s"""Codegend pipeline for stage (id=$codegenStageId) |${this.treeString.trim}""".stripMargin)} + ${ctx.registerComment(s"codegenStageId=$codegenStageId", true)} final class $className extends ${classOf[BufferedRowIterator].getName} { - ${ctx.registerComment(s"codegenStageId=$codegenStageId", true)} private Object[] references; private scala.collection.Iterator[] inputs; From f07dc2dffb625c44b5c33d97b56b719564c61d51 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Wed, 31 Jan 2018 15:06:48 +0000 Subject: [PATCH 3/5] address review comment --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 72a6656fb9820..1c2850e131fab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1226,13 +1226,15 @@ class CodegenContext { /** * Register a comment and return the corresponding place holder + * + * @param force whether to force registering the comments */ - def registerComment(text: => String, forceComment: Boolean = false): String = { + def registerComment(text: => String, force: Boolean = false): String = { // By default, disable comments in generated code because computing the comments themselves can // be extremely expensive in certain cases, such as deeply-nested expressions which operate over // inputs with wide schemas. For more details on the performance issues that motivated this // flat, see SPARK-15680. - if (forceComment || + if (force || SparkEnv.get != null && SparkEnv.get.conf.getBoolean("spark.sql.codegen.comments", false)) { val name = freshName("c") val comment = if (text.contains("\n") || text.contains("\r")) { From cb7a16b1e1abdb7dcb45f2a18085dda0cae8c12f Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Mon, 5 Feb 2018 19:09:38 +0000 Subject: [PATCH 4/5] address review comment --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 8 ++++++-- .../spark/sql/execution/WholeStageCodegenExec.scala | 5 +++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 1c2850e131fab..1637897975185 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1227,16 +1227,20 @@ class CodegenContext { /** * Register a comment and return the corresponding place holder * + * @param placeholderId a string for a place holder * @param force whether to force registering the comments */ - def registerComment(text: => String, force: Boolean = false): String = { + def registerComment( + text: => String, + placeholderId: String = "", + force: Boolean = false): String = { // By default, disable comments in generated code because computing the comments themselves can // be extremely expensive in certain cases, such as deeply-nested expressions which operate over // inputs with wide schemas. For more details on the performance issues that motivated this // flat, see SPARK-15680. if (force || SparkEnv.get != null && SparkEnv.get.conf.getBoolean("spark.sql.codegen.comments", false)) { - val name = freshName("c") + val name = if (placeholderId != "") placeholderId else freshName("c") val comment = if (text.contains("\n") || text.contains("\r")) { text.split("(\r\n)|\r|\n").mkString("/**\n * ", "\n * ", "\n */") } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index ffcbc9223bd96..11c279716483f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -540,8 +540,9 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int) ${ctx.registerComment( s"""Codegend pipeline for stage (id=$codegenStageId) - |${this.treeString.trim}""".stripMargin)} - ${ctx.registerComment(s"codegenStageId=$codegenStageId", true)} + |${this.treeString.trim}""".stripMargin, + "wsc_codegenPipeline")} + ${ctx.registerComment(s"codegenStageId=$codegenStageId", "wsc_codegenStageId", true)} final class $className extends ${classOf[BufferedRowIterator].getName} { private Object[] references; From 0ad1e72c5ec874d688e08b0cfa0a297867be5275 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Tue, 13 Feb 2018 17:10:49 +0000 Subject: [PATCH 5/5] address review comment --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 1637897975185..7c8aeacb46c89 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1227,7 +1227,10 @@ class CodegenContext { /** * Register a comment and return the corresponding place holder * - * @param placeholderId a string for a place holder + * @param placeholderId an optionally specified identifier for the comment's placeholder. + * The caller should make sure this identifier is unique within the + * compilation unit. If this argument is not specified, a fresh identifier + * will be automatically created and used as the placeholder. * @param force whether to force registering the comments */ def registerComment(