From c85862a0421e306324a3b048493a005356a1d98d Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 1 Jun 2026 14:45:48 -0600 Subject: [PATCH] test: add SQL file tests for RuntimeReplaceable functions accelerated by Comet --- .../expressions/array/array_size.sql | 37 ++++++++++++++ .../expressions/array/try_element_at.sql | 51 +++++++++++++++++++ .../expressions/conditional/nullif_zeroif.sql | 40 +++++++++++++++ .../sql-tests/expressions/misc/equal_null.sql | 44 ++++++++++++++++ .../sql-tests/expressions/misc/uniform.sql | 38 ++++++++++++++ .../expressions/string/collation.sql | 33 ++++++++++++ .../expressions/string/to_binary.sql | 41 +++++++++++++++ 7 files changed, 284 insertions(+) create mode 100644 spark/src/test/resources/sql-tests/expressions/array/array_size.sql create mode 100644 spark/src/test/resources/sql-tests/expressions/array/try_element_at.sql create mode 100644 spark/src/test/resources/sql-tests/expressions/conditional/nullif_zeroif.sql create mode 100644 spark/src/test/resources/sql-tests/expressions/misc/equal_null.sql create mode 100644 spark/src/test/resources/sql-tests/expressions/misc/uniform.sql create mode 100644 spark/src/test/resources/sql-tests/expressions/string/collation.sql create mode 100644 spark/src/test/resources/sql-tests/expressions/string/to_binary.sql diff --git a/spark/src/test/resources/sql-tests/expressions/array/array_size.sql b/spark/src/test/resources/sql-tests/expressions/array/array_size.sql new file mode 100644 index 0000000000..8642cd7d27 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/array/array_size.sql @@ -0,0 +1,37 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- array_size returns NULL for NULL input (unlike size which returns -1 in legacy mode). + +statement +CREATE TABLE test_array_size(arr array) USING parquet + +statement +INSERT INTO test_array_size VALUES (array(1, 2, 3)), (array(10)), (array()), (NULL) + +-- non-null arrays +query +SELECT array_size(arr) FROM test_array_size WHERE arr IS NOT NULL + +-- literal arguments: non-null +query +SELECT array_size(array(1, 2, 3)), array_size(array(10)), array_size(array()) + +-- NULL input: Spark returns NULL; Comet bug returns -1 +-- tracked in https://github.com/apache/datafusion-comet/issues/4560 +query ignore(https://github.com/apache/datafusion-comet/issues/4560) +SELECT array_size(CAST(NULL AS ARRAY)) diff --git a/spark/src/test/resources/sql-tests/expressions/array/try_element_at.sql b/spark/src/test/resources/sql-tests/expressions/array/try_element_at.sql new file mode 100644 index 0000000000..35fd54c036 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/array/try_element_at.sql @@ -0,0 +1,51 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- try_element_at lowers to ElementAt(failOnError=false), so out-of-bounds returns NULL. + +statement +CREATE TABLE test_try_element_at(arr array, m map) USING parquet + +statement +INSERT INTO test_try_element_at VALUES + (array(10, 20, 30), map('a', 1, 'b', 2)), + (array(99), map('x', 99)), + (NULL, NULL) + +-- array input: in-bounds access +query +SELECT try_element_at(arr, 1) FROM test_try_element_at + +-- array input: last element via negative index +query +SELECT try_element_at(arr, -1) FROM test_try_element_at + +-- array input: out-of-bounds returns NULL (no exception) +query +SELECT try_element_at(arr, 100) FROM test_try_element_at + +-- NULL array input +query +SELECT try_element_at(CAST(NULL AS ARRAY), 1) + +-- literal array arguments: same codegen bug as element_at with literal arrays +query ignore(Spark codegen bug with literal element_at when constant folding is disabled) +SELECT try_element_at(array(10, 20, 30), 1), try_element_at(array(10, 20, 30), 99) + +-- map input falls back to Spark +query spark_answer_only +SELECT try_element_at(m, 'a'), try_element_at(m, 'missing') FROM test_try_element_at diff --git a/spark/src/test/resources/sql-tests/expressions/conditional/nullif_zeroif.sql b/spark/src/test/resources/sql-tests/expressions/conditional/nullif_zeroif.sql new file mode 100644 index 0000000000..68c422f94c --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/conditional/nullif_zeroif.sql @@ -0,0 +1,40 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- MinSparkVersion: 4.0 + +statement +CREATE TABLE test_nullif_zeroif(v int) USING parquet + +statement +INSERT INTO test_nullif_zeroif VALUES (0), (1), (42), (-5), (NULL) + +-- nullifzero: returns NULL when input is 0, otherwise returns input +query +SELECT nullifzero(v) FROM test_nullif_zeroif + +-- zeroifnull: returns 0 when input is NULL, otherwise returns input +query +SELECT zeroifnull(v) FROM test_nullif_zeroif + +-- literal arguments for nullifzero +query +SELECT nullifzero(0), nullifzero(1), nullifzero(-5), nullifzero(NULL) + +-- literal arguments for zeroifnull +query +SELECT zeroifnull(0), zeroifnull(1), zeroifnull(-5), zeroifnull(NULL) diff --git a/spark/src/test/resources/sql-tests/expressions/misc/equal_null.sql b/spark/src/test/resources/sql-tests/expressions/misc/equal_null.sql new file mode 100644 index 0000000000..7ce2b34646 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/misc/equal_null.sql @@ -0,0 +1,44 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- MinSparkVersion: 3.4 + +statement +CREATE TABLE test_equal_null(a int, b int) USING parquet + +statement +INSERT INTO test_equal_null VALUES (1, 1), (1, 2), (NULL, NULL), (NULL, 1), (1, NULL) + +-- equal_null: same as <=> (null-safe equality) +query +SELECT equal_null(a, b) FROM test_equal_null + +-- literal arguments: both NULL +query +SELECT equal_null(NULL, NULL) + +-- literal arguments: one NULL +query +SELECT equal_null(NULL, 1), equal_null(1, NULL) + +-- literal arguments: equal values +query +SELECT equal_null(3, 3) + +-- literal arguments: unequal values +query +SELECT equal_null(3, 4) diff --git a/spark/src/test/resources/sql-tests/expressions/misc/uniform.sql b/spark/src/test/resources/sql-tests/expressions/misc/uniform.sql new file mode 100644 index 0000000000..c2f8e27750 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/misc/uniform.sql @@ -0,0 +1,38 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- MinSparkVersion: 4.0 + +-- uniform(lo, hi, seed) returns a uniformly distributed random integer in [lo, hi]. +-- All arguments must be literals; Spark rejects column arguments. +-- A fixed seed produces a deterministic result. + +-- integer range with seed 0 +query +SELECT uniform(1, 10, 0) + +-- same bounds (lo == hi) always returns that value +query +SELECT uniform(5, 5, 42) + +-- single-valued range at 0 +query +SELECT uniform(0, 0, 1) + +-- wider range with different seed +query +SELECT uniform(100, 200, 12345) diff --git a/spark/src/test/resources/sql-tests/expressions/string/collation.sql b/spark/src/test/resources/sql-tests/expressions/string/collation.sql new file mode 100644 index 0000000000..e50cb5d7a7 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/string/collation.sql @@ -0,0 +1,33 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- MinSparkVersion: 4.0 + +-- collation(expr) returns the collation name of a string expression. +-- It folds to a string literal at planning time, so Comet evaluates it natively. + +-- default collation on a string literal +query +SELECT collation('abc') + +-- collation of an explicit UTF8_BINARY string +query +SELECT collation('hello' COLLATE UTF8_BINARY) + +-- collation of a NULL string +query +SELECT collation(CAST(NULL AS STRING)) diff --git a/spark/src/test/resources/sql-tests/expressions/string/to_binary.sql b/spark/src/test/resources/sql-tests/expressions/string/to_binary.sql new file mode 100644 index 0000000000..67837919b3 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/string/to_binary.sql @@ -0,0 +1,41 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- to_binary with 'hex' format lowers to Unhex, which Comet accelerates. +-- to_binary with 'utf-8' and 'base64' formats fall back to Spark. + +statement +CREATE TABLE test_to_binary(s string) USING parquet + +statement +INSERT INTO test_to_binary VALUES ('537061726B'), ('41'), ('0A1B'), (''), (NULL) + +-- hex format: accelerated via Unhex +query +SELECT to_binary(s, 'hex') FROM test_to_binary + +-- literal hex arguments +query +SELECT to_binary('41', 'hex'), to_binary('0A1B', 'hex'), to_binary('', 'hex'), to_binary(NULL, 'hex') + +-- utf-8 format falls back to Spark +query spark_answer_only +SELECT to_binary(s, 'utf-8') FROM test_to_binary + +-- base64 format falls back to Spark +query spark_answer_only +SELECT to_binary(s, 'base64') FROM test_to_binary