From e4d3f637e4fa07e3f357477fa0f0182e04a35981 Mon Sep 17 00:00:00 2001 From: Uros Stankovic Date: Fri, 7 Nov 2025 13:35:03 +0100 Subject: [PATCH 1/8] Translate get array item catalyst expression to connector expression --- .../spark/sql/connector/util/V2ExpressionSQLBuilder.java | 8 ++++++++ .../spark/sql/catalyst/util/V2ExpressionBuilder.scala | 3 +++ 2 files changed, 11 insertions(+) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index e3b875469169..6f95f2a598b0 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -127,6 +127,7 @@ yield visitBinaryArithmetic( case "LTRIM" -> visitTrim("LEADING", expressionsToStringArray(e.children())); case "RTRIM" -> visitTrim("TRAILING", expressionsToStringArray(e.children())); case "OVERLAY" -> visitOverlay(expressionsToStringArray(e.children())); + case "GET_ARRAY_ITEM" -> visitGetArrayItem(expressionsToStringArray(e.children())); // TODO supports other expressions default -> visitUnexpectedExpr(expr); }; @@ -348,6 +349,13 @@ protected String visitTrim(String direction, String[] inputs) { } } + protected String visitGetArrayItem(String[] inputs) { + throw new SparkUnsupportedOperationException( + "_LEGACY_ERROR_TEMP_3177", + Map.of("class", this.getClass().getSimpleName(), "funcName", "GET_ARRAY_ITEM") + ); + } + protected String visitExtract(Extract extract) { return visitExtract(extract.field(), build(extract.source())); } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala index 4e391208d984..0a552cdc4b2a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala @@ -326,6 +326,9 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) extends L case _: Sha2 => generateExpressionWithName("SHA2", expr, isPredicate) case _: StringLPad => generateExpressionWithName("LPAD", expr, isPredicate) case _: StringRPad => generateExpressionWithName("RPAD", expr, isPredicate) + case GetArrayItem(_, _, failOnError) if failOnError => + // Pushdown only if ANSI is enabled (fail on error) to be compatible with remote systems. + generateExpressionWithName("GET_ARRAY_ITEM", expr, isPredicate) // TODO supports other expressions case ApplyFunctionExpression(function, children) => val childrenExpressions = children.flatMap(generateExpression(_)) From c383370ea68eb9c515b06f451f099c4833468d7f Mon Sep 17 00:00:00 2001 From: Uros Stankovic Date: Fri, 7 Nov 2025 15:39:28 +0100 Subject: [PATCH 2/8] Implement ToStringSQLBuilder --- .../spark/sql/internal/connector/ToStringSQLBuilder.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala index 118c1af97745..0b9f3fb0ce69 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala @@ -35,4 +35,8 @@ class ToStringSQLBuilder extends V2ExpressionSQLBuilder with Serializable { val distinct = if (isDistinct) "DISTINCT " else "" s"""$funcName($distinct${inputs.mkString(", ")})""" } + + override protected def visitGetArrayItem(inputs: Array[String]): String = { + s"${inputs(0)}[${inputs.tail.mkString(", ")}]" + } } From 09b2d99ac22e028d969574e98fd50dcd7ba81e0f Mon Sep 17 00:00:00 2001 From: Uros Stankovic Date: Tue, 11 Nov 2025 00:18:13 +0100 Subject: [PATCH 3/8] Use separate expression for get array item translation --- .../connector/expressions/GetArrayItem.java | 55 +++++++++++++++++++ .../util/V2ExpressionSQLBuilder.java | 3 +- .../catalyst/util/V2ExpressionBuilder.scala | 12 ++-- .../connector/ToStringSQLBuilder.scala | 5 +- 4 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java new file mode 100644 index 000000000000..f18708e68635 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.expressions; + +import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.internal.connector.ExpressionWithToString; + +/** + * Get array item expression. + * + * @since 4.1.0 + */ + +@Evolving +public class GetArrayItem extends ExpressionWithToString { + + private final Expression childArray; + private final Expression ordinal; + private final boolean failOnError; + + /** + * Creates GetArrayItem expression. + * @param childArray Array that is source to get element from. Child of this expression. + * @param ordinal Ordinal of element. Zero-based indexing. + * @param failOnError Whether expression should throw exception for index out of bound or to + * return null. + */ + public GetArrayItem(Expression childArray, Expression ordinal, boolean failOnError) { + this.childArray = childArray; + this.ordinal = ordinal; + this.failOnError = failOnError; + } + + public Expression getChildArray() { return this.childArray; } + public Expression getOrdinal() { return this.ordinal; } + public boolean getFailOnError() { return this.failOnError; } + + @Override + public Expression[] children() { return new Expression[]{ getChildArray() }; } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index 6f95f2a598b0..7adaa6555dd7 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -29,6 +29,7 @@ import org.apache.spark.sql.connector.expressions.Extract; import org.apache.spark.sql.connector.expressions.NamedReference; import org.apache.spark.sql.connector.expressions.GeneralScalarExpression; +import org.apache.spark.sql.connector.expressions.GetArrayItem; import org.apache.spark.sql.connector.expressions.Literal; import org.apache.spark.sql.connector.expressions.NullOrdering; import org.apache.spark.sql.connector.expressions.SortDirection; @@ -349,7 +350,7 @@ protected String visitTrim(String direction, String[] inputs) { } } - protected String visitGetArrayItem(String[] inputs) { + protected String visitGetArrayItem(GetArrayItem getArrayItem) { throw new SparkUnsupportedOperationException( "_LEGACY_ERROR_TEMP_3177", Map.of("class", this.getClass().getSimpleName(), "funcName", "GET_ARRAY_ITEM") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala index 0a552cdc4b2a..72b466f5a0f9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke} import org.apache.spark.sql.catalyst.optimizer.ConstantFolding import org.apache.spark.sql.connector.catalog.functions.ScalarFunction -import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression => V2Expression, Extract => V2Extract, FieldReference, GeneralScalarExpression, LiteralValue, NullOrdering, SortDirection, SortValue, UserDefinedScalarFunc} +import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression => V2Expression, Extract => V2Extract, FieldReference, GeneralScalarExpression, GetArrayItem => V2GetArrayItem, LiteralValue, NullOrdering, SortDirection, SortValue, UserDefinedScalarFunc} import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Avg, Count, CountStar, GeneralAggregateFunc, Max, Min, Sum, UserDefinedAggregateFunc} import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate => V2Predicate} import org.apache.spark.sql.internal.SQLConf @@ -326,9 +326,13 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) extends L case _: Sha2 => generateExpressionWithName("SHA2", expr, isPredicate) case _: StringLPad => generateExpressionWithName("LPAD", expr, isPredicate) case _: StringRPad => generateExpressionWithName("RPAD", expr, isPredicate) - case GetArrayItem(_, _, failOnError) if failOnError => - // Pushdown only if ANSI is enabled (fail on error) to be compatible with remote systems. - generateExpressionWithName("GET_ARRAY_ITEM", expr, isPredicate) + case GetArrayItem(child, ordinal, failOnError) => + (generateExpression(child), generateExpression(ordinal)) match { + case (Some(v2ArrayChild), Some(v2Ordinal)) => + Some(new V2GetArrayItem(v2ArrayChild, v2Ordinal, failOnError)) + case _ => + None + } // TODO supports other expressions case ApplyFunctionExpression(function, children) => val childrenExpressions = children.flatMap(generateExpression(_)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala index 0b9f3fb0ce69..7adac2a5e8cb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.internal.connector +import org.apache.spark.sql.connector.expressions.GetArrayItem import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder /** @@ -36,7 +37,7 @@ class ToStringSQLBuilder extends V2ExpressionSQLBuilder with Serializable { s"""$funcName($distinct${inputs.mkString(", ")})""" } - override protected def visitGetArrayItem(inputs: Array[String]): String = { - s"${inputs(0)}[${inputs.tail.mkString(", ")}]" + override protected def visitGetArrayItem(getArrayItem: GetArrayItem): String = { + s"${getArrayItem.getChildArray.toString}[${getArrayItem.getOrdinal.toString}]" } } From 3b0318f09b7a5d5add9651adfca004c717adde2e Mon Sep 17 00:00:00 2001 From: Uros Stankovic Date: Tue, 11 Nov 2025 00:35:21 +0100 Subject: [PATCH 4/8] Introduce new error code --- common/utils/src/main/resources/error/error-conditions.json | 6 ++++++ .../spark/sql/connector/util/V2ExpressionSQLBuilder.java | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 57ed891087f2..4a27960b50f0 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -1693,6 +1693,12 @@ ], "sqlState" : "42846" }, + "EXPRESSION_TRANSLATION_TO_V2_IS_NOT_SUPPORTED" : { + "message" : [ + "Expression cannot be translated to v2 expression." + ], + "sqlState" : "0A000" + }, "EXPRESSION_TYPE_IS_NOT_ORDERABLE" : { "message" : [ "Column expression cannot be sorted because its type is not orderable." diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index 7adaa6555dd7..4e7f4df621f0 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -352,8 +352,8 @@ protected String visitTrim(String direction, String[] inputs) { protected String visitGetArrayItem(GetArrayItem getArrayItem) { throw new SparkUnsupportedOperationException( - "_LEGACY_ERROR_TEMP_3177", - Map.of("class", this.getClass().getSimpleName(), "funcName", "GET_ARRAY_ITEM") + "EXPRESSION_TRANSLATION_TO_V2_IS_NOT_SUPPORTED", + Map.of("expr", getArrayItem.toString()) ); } From 1c27c59629fa363d56e3e7d1583a60cded466628 Mon Sep 17 00:00:00 2001 From: Uros Stankovic Date: Tue, 11 Nov 2025 00:42:31 +0100 Subject: [PATCH 5/8] Fix building of get array item --- .../spark/sql/connector/util/V2ExpressionSQLBuilder.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index 4e7f4df621f0..50921f3de0b4 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -85,6 +85,8 @@ public String build(Expression expr) { } else if (expr instanceof SortOrder sortOrder) { return visitSortOrder( build(sortOrder.expression()), sortOrder.direction(), sortOrder.nullOrdering()); + } else if (expr instanceof GetArrayItem getArrayItem) { + return visitGetArrayItem(getArrayItem); } else if (expr instanceof GeneralScalarExpression e) { String name = e.name(); return switch (name) { @@ -128,7 +130,6 @@ yield visitBinaryArithmetic( case "LTRIM" -> visitTrim("LEADING", expressionsToStringArray(e.children())); case "RTRIM" -> visitTrim("TRAILING", expressionsToStringArray(e.children())); case "OVERLAY" -> visitOverlay(expressionsToStringArray(e.children())); - case "GET_ARRAY_ITEM" -> visitGetArrayItem(expressionsToStringArray(e.children())); // TODO supports other expressions default -> visitUnexpectedExpr(expr); }; From 81ddd910c6919dd550a9ec32f8c55384183ee9ce Mon Sep 17 00:00:00 2001 From: Uros Stankovic Date: Tue, 11 Nov 2025 12:25:16 +0100 Subject: [PATCH 6/8] Rename getters --- .../spark/sql/connector/expressions/GetArrayItem.java | 8 ++++---- .../spark/sql/internal/connector/ToStringSQLBuilder.scala | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java index f18708e68635..8e7ea73dca22 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java @@ -46,10 +46,10 @@ public GetArrayItem(Expression childArray, Expression ordinal, boolean failOnErr this.failOnError = failOnError; } - public Expression getChildArray() { return this.childArray; } - public Expression getOrdinal() { return this.ordinal; } - public boolean getFailOnError() { return this.failOnError; } + public Expression childArray() { return this.childArray; } + public Expression ordinal() { return this.ordinal; } + public boolean failOnError() { return this.failOnError; } @Override - public Expression[] children() { return new Expression[]{ getChildArray() }; } + public Expression[] children() { return new Expression[]{ childArray() }; } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala index 7adac2a5e8cb..5c54f2897645 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala @@ -38,6 +38,6 @@ class ToStringSQLBuilder extends V2ExpressionSQLBuilder with Serializable { } override protected def visitGetArrayItem(getArrayItem: GetArrayItem): String = { - s"${getArrayItem.getChildArray.toString}[${getArrayItem.getOrdinal.toString}]" + s"${getArrayItem.childArray.toString}[${getArrayItem.ordinal.toString}]" } } From 787315cc409e4e1b6ace0fbe2b6fcc5e3fdeb575 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 12 Nov 2025 21:43:36 +0800 Subject: [PATCH 7/8] Update sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java --- .../apache/spark/sql/connector/expressions/GetArrayItem.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java index 8e7ea73dca22..683056fa8fb5 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java @@ -51,5 +51,5 @@ public GetArrayItem(Expression childArray, Expression ordinal, boolean failOnErr public boolean failOnError() { return this.failOnError; } @Override - public Expression[] children() { return new Expression[]{ childArray() }; } + public Expression[] children() { return new Expression[]{ childArray, ordinal }; } } From 9f75dd8330b4a7a497cd84c2d2988dc5e6c147e7 Mon Sep 17 00:00:00 2001 From: Uros Stankovic <155642965+urosstan-db@users.noreply.github.com> Date: Wed, 12 Nov 2025 14:54:33 +0100 Subject: [PATCH 8/8] nit: add whitespace --- .../apache/spark/sql/connector/expressions/GetArrayItem.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java index 683056fa8fb5..5d7e05f598ad 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GetArrayItem.java @@ -51,5 +51,5 @@ public GetArrayItem(Expression childArray, Expression ordinal, boolean failOnErr public boolean failOnError() { return this.failOnError; } @Override - public Expression[] children() { return new Expression[]{ childArray, ordinal }; } + public Expression[] children() { return new Expression[]{ childArray, ordinal }; } }