From 1cc3ce114f60c9ea6cbd640c14e9a66b4e2105b2 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 31 Jan 2026 15:47:36 -0700 Subject: [PATCH] chore: Migrate concat and concat_ws tests from Scala to SQL test framework Co-Authored-By: Claude Opus 4.5 --- .../expressions/array/array_concat.sql | 42 +++++++++++++ .../sql-tests/expressions/string/concat.sql | 41 ++++++++++++- .../expressions/string/concat_ws.sql | 10 ++++ .../apache/comet/CometExpressionSuite.scala | 60 ------------------- .../comet/CometStringExpressionSuite.scala | 13 ---- 5 files changed, 91 insertions(+), 75 deletions(-) create mode 100644 spark/src/test/resources/sql-tests/expressions/array/array_concat.sql diff --git a/spark/src/test/resources/sql-tests/expressions/array/array_concat.sql b/spark/src/test/resources/sql-tests/expressions/array/array_concat.sql new file mode 100644 index 0000000000..9a3a4a8138 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/array/array_concat.sql @@ -0,0 +1,42 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- ConfigMatrix: parquet.enable.dictionary=false,true + +-- migrated from CometExpressionSuite "test concat function - arrays" +-- https://github.com/apache/datafusion-comet/issues/2647 + +statement +CREATE TABLE test_array_concat(c1 array, c2 array, c3 array, c4 array, c5 array) USING parquet + +statement +INSERT INTO test_array_concat VALUES (array(0, 1), array(2, 3), array(), array(null), null), (array(1, 2), array(3, 4), array(), array(null), null), (array(2, 3), array(4, 5), array(), array(null), null) + +query expect_fallback(CONCAT supports only string input parameters) +SELECT concat(c1, c2) AS x FROM test_array_concat + +query expect_fallback(CONCAT supports only string input parameters) +SELECT concat(c1, c1) AS x FROM test_array_concat + +query expect_fallback(CONCAT supports only string input parameters) +SELECT concat(c1, c2, c3) AS x FROM test_array_concat + +query expect_fallback(CONCAT supports only string input parameters) +SELECT concat(c1, c2, c3, c5) AS x FROM test_array_concat + +query expect_fallback(CONCAT supports only string input parameters) +SELECT concat(concat(c1, c2, c3), concat(c1, c3)) AS x FROM test_array_concat diff --git a/spark/src/test/resources/sql-tests/expressions/string/concat.sql b/spark/src/test/resources/sql-tests/expressions/string/concat.sql index fcf2416bc2..a1022f73d0 100644 --- a/spark/src/test/resources/sql-tests/expressions/string/concat.sql +++ b/spark/src/test/resources/sql-tests/expressions/string/concat.sql @@ -18,10 +18,10 @@ -- ConfigMatrix: parquet.enable.dictionary=false,true statement -CREATE TABLE test_concat(a string, b string, c string) USING parquet +CREATE TABLE test_concat(a string, b string, c string, d string) USING parquet statement -INSERT INTO test_concat VALUES ('hello', ' ', 'world'), ('', '', ''), (NULL, 'b', 'c'), ('a', NULL, 'c'), (NULL, NULL, NULL) +INSERT INTO test_concat VALUES ('hello', ' ', 'world', NULL), ('', '', '', NULL), (NULL, 'b', 'c', NULL), ('a', NULL, 'c', NULL), (NULL, NULL, NULL, NULL) query SELECT concat(a, b, c) FROM test_concat @@ -33,6 +33,43 @@ SELECT a || b || c FROM test_concat query SELECT concat(a, ' ', c) FROM test_concat +-- migrated from CometExpressionSuite "test concat function - strings" +-- two arguments +query +SELECT concat(a, b) FROM test_concat + +-- same column twice +query +SELECT concat(a, a) FROM test_concat + +-- four arguments with null column +query +SELECT concat(a, b, c, d) FROM test_concat + +-- nested concat +query +SELECT concat(concat(a, b, c), concat(a, c)) FROM test_concat + -- literal + literal + literal query SELECT concat('hello', ' ', 'world'), concat('', '', ''), concat(NULL, 'b', 'c') + +-- migrated from CometExpressionSuite "test concat function - binary" +-- https://github.com/apache/datafusion-comet/issues/2647 +statement +CREATE TABLE test_concat_binary USING parquet AS SELECT cast(uuid() as binary) c1, cast(uuid() as binary) c2, cast(uuid() as binary) c3, cast(uuid() as binary) c4, cast(null as binary) c5 FROM range(10) + +query expect_fallback(CONCAT supports only string input parameters) +SELECT concat(c1, c2) AS x FROM test_concat_binary + +query expect_fallback(CONCAT supports only string input parameters) +SELECT concat(c1, c1) AS x FROM test_concat_binary + +query expect_fallback(CONCAT supports only string input parameters) +SELECT concat(c1, c2, c3) AS x FROM test_concat_binary + +query expect_fallback(CONCAT supports only string input parameters) +SELECT concat(c1, c2, c3, c5) AS x FROM test_concat_binary + +query expect_fallback(CONCAT supports only string input parameters) +SELECT concat(concat(c1, c2, c3), concat(c1, c3)) AS x FROM test_concat_binary diff --git a/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql b/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql index fd277fd7c4..4a3df68965 100644 --- a/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql +++ b/spark/src/test/resources/sql-tests/expressions/string/concat_ws.sql @@ -32,6 +32,16 @@ SELECT concat_ws('', a, b, c) FROM test_concat_ws query SELECT concat_ws(NULL, a, b, c) FROM test_concat_ws +-- migrated from CometStringExpressionSuite "string concat_ws" +statement +CREATE TABLE names(id int, first_name varchar(20), middle_initial char(1), last_name varchar(20)) USING parquet + +statement +INSERT INTO names VALUES(1, 'James', 'B', 'Taylor'), (2, 'Smith', 'C', 'Davis'), (3, NULL, NULL, NULL), (4, 'Smith', 'C', 'Davis') + +query +SELECT concat_ws(' ', first_name, middle_initial, last_name) FROM names + -- literal + literal + literal query ignore(https://github.com/apache/datafusion-comet/issues/3339) SELECT concat_ws(',', 'hello', 'world'), concat_ws(',', '', ''), concat_ws(',', NULL, 'b', 'c'), concat_ws(NULL, 'a', 'b') diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 2678f1484b..5a22583ae0 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -39,7 +39,6 @@ import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE import org.apache.spark.sql.types._ import org.apache.comet.CometSparkSessionExtensions.isSpark40Plus -import org.apache.comet.serde.CometConcat import org.apache.comet.testing.{DataGenOptions, FuzzDataGenerator} class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { @@ -3124,65 +3123,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - test("test concat function - strings") { - withTable("t1") { - sql( - "create table t1 using parquet as select uuid() c1, uuid() c2, uuid() c3, uuid() c4, cast(null as string) c5 from range(10)") - checkSparkAnswerAndOperator("select concat(c1, c2) AS x FROM t1") - checkSparkAnswerAndOperator("select concat(c1, c1) AS x FROM t1") - checkSparkAnswerAndOperator("select concat(c1, c2, c3) AS x FROM t1") - checkSparkAnswerAndOperator("select concat(c1, c2, c3, c5) AS x FROM t1") - checkSparkAnswerAndOperator( - "select concat(concat(c1, c2, c3), concat(c1, c3)) AS x FROM t1") - } - } - - // https://github.com/apache/datafusion-comet/issues/2647 - test("test concat function - arrays") { - withTable("t1") { - sql( - "create table t1 using parquet as select array(id, id+1) c1, array(id+2, id+3) c2, CAST(array() AS array) c3, CAST(array(null) as array) c4, cast(null as array) c5 from range(10)") - checkSparkAnswerAndFallbackReason( - "select concat(c1, c2) AS x FROM t1", - CometConcat.unsupportedReason) - checkSparkAnswerAndFallbackReason( - "select concat(c1, c1) AS x FROM t1", - CometConcat.unsupportedReason) - checkSparkAnswerAndFallbackReason( - "select concat(c1, c2, c3) AS x FROM t1", - CometConcat.unsupportedReason) - checkSparkAnswerAndFallbackReason( - "select concat(c1, c2, c3, c5) AS x FROM t1", - CometConcat.unsupportedReason) - checkSparkAnswerAndFallbackReason( - "select concat(concat(c1, c2, c3), concat(c1, c3)) AS x FROM t1", - CometConcat.unsupportedReason) - } - } - - // https://github.com/apache/datafusion-comet/issues/2647 - test("test concat function - binary") { - withTable("t1") { - sql( - "create table t1 using parquet as select cast(uuid() as binary) c1, cast(uuid() as binary) c2, cast(uuid() as binary) c3, cast(uuid() as binary) c4, cast(null as binary) c5 from range(10)") - checkSparkAnswerAndFallbackReason( - "select concat(c1, c2) AS x FROM t1", - CometConcat.unsupportedReason) - checkSparkAnswerAndFallbackReason( - "select concat(c1, c1) AS x FROM t1", - CometConcat.unsupportedReason) - checkSparkAnswerAndFallbackReason( - "select concat(c1, c2, c3) AS x FROM t1", - CometConcat.unsupportedReason) - checkSparkAnswerAndFallbackReason( - "select concat(c1, c2, c3, c5) AS x FROM t1", - CometConcat.unsupportedReason) - checkSparkAnswerAndFallbackReason( - "select concat(concat(c1, c2, c3), concat(c1, c3)) AS x FROM t1", - CometConcat.unsupportedReason) - } - } - // https://github.com/apache/datafusion-comet/issues/2813 test("make decimal using DataFrame API - integer") { withTable("t1") { diff --git a/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala index 662534b3cd..2a2932c643 100644 --- a/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala @@ -260,19 +260,6 @@ class CometStringExpressionSuite extends CometTestBase { } } - test("string concat_ws") { - val table = "names" - withTable(table) { - sql( - s"create table $table(id int, first_name varchar(20), middle_initial char(1), last_name varchar(20)) using parquet") - sql( - s"insert into $table values(1, 'James', 'B', 'Taylor'), (2, 'Smith', 'C', 'Davis')," + - " (3, NULL, NULL, NULL), (4, 'Smith', 'C', 'Davis')") - checkSparkAnswerAndOperator( - s"SELECT concat_ws(' ', first_name, middle_initial, last_name) FROM $table") - } - } - test("string repeat") { val table = "names" withTable(table) {