apache · beliefer · Jul 8, 2023 · Jul 8, 2023 · Jul 11, 2023 · vinodkc
diff --git a/...c/main/scala/org/apache/spark/sql/execution/window/WindowGroupLimitEvaluatorFactory.scala b/...c/main/scala/org/apache/spark/sql/execution/window/WindowGroupLimitEvaluatorFactory.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.window
+
+import org.apache.spark.{PartitionEvaluator, PartitionEvaluatorFactory}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, DenseRank, Expression, Rank, RowNumber, SortOrder}
+import org.apache.spark.sql.execution.metric.SQLMetric
+
+class WindowGroupLimitEvaluatorFactory(
+    partitionSpec: Seq[Expression],
+    orderSpec: Seq[SortOrder],
+    rankLikeFunction: Expression,
+    limit: Int,
+    childOutput: Seq[Attribute],
+    numOutputRows: SQLMetric)
+  extends PartitionEvaluatorFactory[InternalRow, InternalRow] {
+
+  override def createEvaluator(): PartitionEvaluator[InternalRow, InternalRow] = {
+    val limitFunc = rankLikeFunction match {
+      case _: RowNumber =>
+        (iter: Iterator[InternalRow]) => SimpleLimitIterator(iter, limit, numOutputRows)
+      case _: Rank =>
+        (iter: Iterator[InternalRow]) =>
+          RankLimitIterator(childOutput, iter, orderSpec, limit, numOutputRows)
+      case _: DenseRank =>
+        (iter: Iterator[InternalRow]) =>
+          DenseRankLimitIterator(childOutput, iter, orderSpec, limit, numOutputRows)
+    }
+
+    if (partitionSpec.isEmpty) {
+      new WindowGroupLimitPartitionEvaluator(limitFunc)
+    } else {
+      new WindowGroupLimitPartitionEvaluator(
+        input => new GroupedLimitIterator(input, childOutput, partitionSpec, limitFunc))
+    }
+  }
+
+  class WindowGroupLimitPartitionEvaluator(f: Iterator[InternalRow] => Iterator[InternalRow])
+    extends PartitionEvaluator[InternalRow, InternalRow] {
+
+    override def eval(
+        partitionIndex: Int,
+        inputs: Iterator[InternalRow]*): Iterator[InternalRow] = {
+      f(inputs.head)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowGroupLimitExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowGroupLimitExec.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.window
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, DenseRank, Expression, Rank, RowNumber, SortOrder, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Expression, SortOrder, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateOrdering
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
@@ -73,26 +73,23 @@ case class WindowGroupLimitExec(
 
   protected override def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
-    rankLikeFunction match {
-      case _: RowNumber if partitionSpec.isEmpty =>
-        child.execute().mapPartitionsInternal(SimpleLimitIterator(_, limit, numOutputRows))
-      case _: RowNumber =>
-        child.execute().mapPartitionsInternal(new GroupedLimitIterator(_, output, partitionSpec,
-          (input: Iterator[InternalRow]) => SimpleLimitIterator(input, limit, numOutputRows)))
-      case _: Rank if partitionSpec.isEmpty =>
-        child.execute().mapPartitionsInternal(
-          RankLimitIterator(output, _, orderSpec, limit, numOutputRows))
-      case _: Rank =>
-        child.execute().mapPartitionsInternal(new GroupedLimitIterator(_, output, partitionSpec,
-          (input: Iterator[InternalRow]) =>
-            RankLimitIterator(output, input, orderSpec, limit, numOutputRows)))
-      case _: DenseRank if partitionSpec.isEmpty =>
-        child.execute().mapPartitionsInternal(
-          DenseRankLimitIterator(output, _, orderSpec, limit, numOutputRows))
-      case _: DenseRank =>
-        child.execute().mapPartitionsInternal(new GroupedLimitIterator(_, output, partitionSpec,
-          (input: Iterator[InternalRow]) =>
-            DenseRankLimitIterator(output, input, orderSpec, limit, numOutputRows)))
+
+    val evaluatorFactory =
+      new WindowGroupLimitEvaluatorFactory(
+        partitionSpec,
+        orderSpec,
+        rankLikeFunction,
+        limit,
+        child.output,
+        numOutputRows)
+
+    if (conf.usePartitionEvaluator) {
+      child.execute().mapPartitionsWithEvaluator(evaluatorFactory)
+    } else {
+      child.execute().mapPartitionsInternal { iter =>
+        val evaluator = evaluatorFactory.createEvaluator()
+        evaluator.eval(0, iter)
+      }
     }
   }