diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala index 0b74a2667a273..d8f67fb7357e7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala @@ -132,14 +132,18 @@ trait BaseLimitExec extends LimitExec with CodegenSupport { } protected override def doProduce(ctx: CodegenContext): String = { - child.asInstanceOf[CodegenSupport].produce(ctx, this) - } - - override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = { // The counter name is already obtained by the upstream operators via `limitNotReachedChecks`. // Here we have to inline it to not change its name. This is fine as we won't have many limit // operators in one query. + // + // Note: create counter variable here instead of `doConsume()` to avoid compilation error, + // because upstream operators might not call `doConsume()` here + // (e.g. `HashJoin.codegenInner()`). ctx.addMutableState(CodeGenerator.JAVA_INT, countTerm, forceInline = true, useFreshName = false) + child.asInstanceOf[CodegenSupport].produce(ctx, this) + } + + override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = { s""" | if ($countTerm < $limit) { | $countTerm += 1; diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index f709d80345606..00cbd73533ab9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4097,6 +4097,25 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark checkAnswer(df2, Seq(Row(2, 1, 1), Row(4, 2, 2))) } } + + test("SPARK-34796: Avoid code-gen compilation error for LIMIT query") { + withTable("left_table", "empty_right_table", "output_table") { + spark.range(5).toDF("k").write.saveAsTable("left_table") + spark.range(0).toDF("k").write.saveAsTable("empty_right_table") + + withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { + spark.sql("CREATE TABLE output_table (k INT) USING parquet") + spark.sql( + """ + |INSERT INTO TABLE output_table + |SELECT t1.k FROM left_table t1 + |JOIN empty_right_table t2 + |ON t1.k = t2.k + |LIMIT 3 + """.stripMargin) + } + } + } } case class Foo(bar: Option[String])