apache · acvictor · Apr 25, 2024 · Apr 25, 2024 · Apr 29, 2024 · Apr 29, 2024
diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -175,6 +175,7 @@ object CHExpressionUtil {
     ARRAY_EXCEPT -> DefaultValidator(),
     ARRAY_REPEAT -> DefaultValidator(),
     ARRAY_REMOVE -> DefaultValidator(),
+    ARRAY_SIZE -> DefaultValidator(),
     DATE_FROM_UNIX_DATE -> DefaultValidator(),
     UNIX_DATE -> DefaultValidator(),
     MONOTONICALLY_INCREASING_ID -> DefaultValidator(),

diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.gluten.execution
 
+import org.apache.gluten.sql.shims.SparkShimLoader
+
 import org.apache.spark.sql.types._
 
 import java.sql.Timestamp
@@ -826,4 +828,21 @@ class ScalarFunctionsValidateSuite extends FunctionsValidateTest {
     }
   }
 
+  test("test array_size") {
+    if (!SparkShimLoader.getSparkVersion.startsWith("3.2")) {
+      withTempPath {
+        path =>
+          Seq[Seq[Integer]](Seq(1, null, 5, 4), Seq(5, -1, 8, 9, -7, 2), Seq.empty, null)
+            .toDF("value")
+            .write
+            .parquet(path.getCanonicalPath)
+
+          spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("array_tbl")
+
+          runQueryAndCompare("select array_size(value) as res from array_tbl;") {
+            checkGlutenOperatorMatch[ProjectExecTransformer]
+          }
+      }
+    }
+  }
 }
diff --git a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -246,6 +246,7 @@ object ExpressionNames {
   final val ARRAY_EXCEPT = "array_except"
   final val ARRAY_REPEAT = "array_repeat"
   final val ARRAY_REMOVE = "array_remove"
+  final val ARRAY_SIZE = "array_size"
   final val FILTER = "filter"
   final val FORALL = "forall"
   final val EXISTS = "exists"

diff --git a/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala b/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala
@@ -18,7 +18,7 @@ package org.apache.gluten.sql.shims.spark33
 
 import org.apache.gluten.execution.datasource.GlutenParquetWriterInjects
 import org.apache.gluten.expression.{ExpressionNames, Sig}
-import org.apache.gluten.expression.ExpressionNames.{CEIL, FLOOR, KNOWN_NULLABLE, TIMESTAMP_ADD}
+import org.apache.gluten.expression.ExpressionNames.{ARRAY_SIZE, CEIL, FLOOR, KNOWN_NULLABLE, TIMESTAMP_ADD}
 import org.apache.gluten.sql.shims.{ShimDescriptor, SparkShims}
 
 import org.apache.spark._
@@ -69,7 +69,8 @@ class Spark33Shims extends SparkShims {
       Sig[Empty2Null](ExpressionNames.EMPTY2NULL),
       Sig[TimestampAdd](TIMESTAMP_ADD),
       Sig[RoundFloor](FLOOR),
-      Sig[RoundCeil](CEIL)
+      Sig[RoundCeil](CEIL),
+      Sig[ArraySize](ARRAY_SIZE)
     )
   }
 

diff --git a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
@@ -17,7 +17,7 @@
 package org.apache.gluten.sql.shims.spark34
 
 import org.apache.gluten.expression.{ExpressionNames, Sig}
-import org.apache.gluten.expression.ExpressionNames.KNOWN_NULLABLE
+import org.apache.gluten.expression.ExpressionNames.{ARRAY_SIZE, KNOWN_NULLABLE}
 import org.apache.gluten.sql.shims.{ShimDescriptor, SparkShims}
 
 import org.apache.spark._
@@ -70,7 +70,8 @@ class Spark34Shims extends SparkShims {
       Sig[Sec](ExpressionNames.SEC),
       Sig[Csc](ExpressionNames.CSC),
       Sig[KnownNullable](KNOWN_NULLABLE),
-      Sig[Empty2Null](ExpressionNames.EMPTY2NULL)
+      Sig[Empty2Null](ExpressionNames.EMPTY2NULL),
+      Sig[ArraySize](ARRAY_SIZE)
     )
   }
 

diff --git a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
@@ -17,6 +17,7 @@
 package org.apache.gluten.sql.shims.spark35
 
 import org.apache.gluten.expression.{ExpressionNames, Sig}
+import org.apache.gluten.expression.ExpressionNames.ARRAY_SIZE
 import org.apache.gluten.sql.shims.{ShimDescriptor, SparkShims}
 
 import org.apache.spark._
@@ -70,7 +71,9 @@ class Spark35Shims extends SparkShims {
       Sig[SplitPart](ExpressionNames.SPLIT_PART),
       Sig[Sec](ExpressionNames.SEC),
       Sig[Csc](ExpressionNames.CSC),
-      Sig[Empty2Null](ExpressionNames.EMPTY2NULL))
+      Sig[Empty2Null](ExpressionNames.EMPTY2NULL),
+      Sig[ArraySize](ARRAY_SIZE)
+    )
   }
 
   override def aggregateExpressionMappings: Seq[Sig] = {