Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/contributor-guide/spark_expressions_support.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@
### collection_funcs

- [ ] array_size
- [ ] cardinality
- [x] cardinality
- [x] concat
- [x] reverse
- [x] size
Expand Down
8 changes: 2 additions & 6 deletions spark/src/main/scala/org/apache/comet/serde/arrays.scala
Original file line number Diff line number Diff line change
Expand Up @@ -639,13 +639,9 @@ object CometArrayFilter extends CometExpressionSerde[ArrayFilter] {

object CometSize extends CometExpressionSerde[Size] {

override def getUnsupportedReasons(): Seq[String] = Seq(
"Only supports `ArrayType` input; `MapType` input is not supported")

override def getSupportLevel(expr: Size): SupportLevel = {
expr.child.dataType match {
case _: ArrayType => Compatible()
case _: MapType => Unsupported(Some("size does not support map inputs"))
case _: ArrayType | _: MapType => Compatible()
case other =>
// this should be unreachable because Spark only supports map and array inputs
Unsupported(Some(s"Unsupported child data type: $other"))
Expand All @@ -660,7 +656,7 @@ object CometSize extends CometExpressionSerde[Size] {
for {
isNotNullExprProto <- createIsNotNullExprProto(expr, inputs, binding)
sizeScalarExprProto <- scalarFunctionExprToProto("size", arrayExprProto)
emptyLiteralExprProto <- createLiteralExprProto(SQLConf.get.legacySizeOfNull)
emptyLiteralExprProto <- createLiteralExprProto(expr.legacySizeOfNull)
} yield {
val caseWhenExpr = ExprOuterClass.CaseWhen
.newBuilder()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- cardinality() is an alias for size() with legacySizeOfNull=false:
-- it always returns NULL for NULL input (never -1), and supports
-- both array and map inputs.
-- inputTypes: TypeCollection(ArrayType, MapType) -> test both

statement
CREATE TABLE test_cardinality(
arr array<int>,
nested_arr array<array<int>>,
struct_arr array<struct<a: int>>,
m map<string, int>
) USING parquet

statement
INSERT INTO test_cardinality VALUES
(array(1, 2, 3), array(array(1, 2), array(3)), array(named_struct('a', 1), named_struct('a', 2)), map('a', 1, 'b', 2)),
(array(10), array(array(10)), array(named_struct('a', 1)), map('x', 99)),
(array(), array(), array(), map()),
(NULL, NULL, NULL, NULL)

-- column reference: array input
query
SELECT cardinality(arr) FROM test_cardinality

-- column reference: map input
query
SELECT cardinality(m) FROM test_cardinality

-- both in same query
query
SELECT cardinality(arr), cardinality(m) FROM test_cardinality

-- cardinality returns NULL for NULL input (not -1 like size() in legacy mode)
query
SELECT cardinality(arr), cardinality(m) FROM test_cardinality WHERE arr IS NULL

-- nested array input
query
SELECT cardinality(nested_arr) FROM test_cardinality

-- array-of-structs input
query
SELECT cardinality(struct_arr) FROM test_cardinality
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ CREATE TABLE test_size(arr array<int>, m map<string, int>) USING parquet
statement
INSERT INTO test_size VALUES (array(1, 2, 3), map('a', 1, 'b', 2)), (array(), map()), (NULL, NULL)

query spark_answer_only
query
SELECT size(arr), size(m) FROM test_size

-- literal arguments
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,22 +126,24 @@ class CometMapExpressionSuite extends CometTestBase {
}
}

test("fallback for size with map input") {
test("fallback for size with map constructor input") {
withTempDir { dir =>
withTempView("t1") {
val path = new Path(dir.toURI.toString, "test.parquet")
makeParquetFileAllPrimitiveTypes(path, dictionaryEnabled = true, 100)
spark.read.parquet(path.toString).createOrReplaceTempView("t1")

// Use column references in maps to avoid constant folding
// Size now supports MapType inputs, this falls back since CreateMap
// is not yet supported natively. Use column references to avoid
// constant folding.
checkSparkAnswerAndFallbackReason(
sql("SELECT size(case when _2 < 0 then map(_8, _9) else map() end) from t1"),
"size does not support map inputs")
"map is not supported")
}
}
}

// fails with "map is not supported"
// still fails because CreateMap is not supported natively
ignore("size with map input") {
withTempDir { dir =>
withTempView("t1") {
Expand Down