Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- array_size returns NULL for NULL input (unlike size which returns -1 in legacy mode).

statement
CREATE TABLE test_array_size(arr array<int>) USING parquet

statement
INSERT INTO test_array_size VALUES (array(1, 2, 3)), (array(10)), (array()), (NULL)

-- non-null arrays
query
SELECT array_size(arr) FROM test_array_size WHERE arr IS NOT NULL

-- literal arguments: non-null
query
SELECT array_size(array(1, 2, 3)), array_size(array(10)), array_size(array())

-- NULL input: Spark returns NULL; Comet bug returns -1
-- tracked in https://github.com/apache/datafusion-comet/issues/4560
query ignore(https://github.com/apache/datafusion-comet/issues/4560)
SELECT array_size(CAST(NULL AS ARRAY<INT>))
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- try_element_at lowers to ElementAt(failOnError=false), so out-of-bounds returns NULL.

statement
CREATE TABLE test_try_element_at(arr array<int>, m map<string, int>) USING parquet

statement
INSERT INTO test_try_element_at VALUES
(array(10, 20, 30), map('a', 1, 'b', 2)),
(array(99), map('x', 99)),
(NULL, NULL)

-- array input: in-bounds access
query
SELECT try_element_at(arr, 1) FROM test_try_element_at

-- array input: last element via negative index
query
SELECT try_element_at(arr, -1) FROM test_try_element_at

-- array input: out-of-bounds returns NULL (no exception)
query
SELECT try_element_at(arr, 100) FROM test_try_element_at

-- NULL array input
query
SELECT try_element_at(CAST(NULL AS ARRAY<INT>), 1)

-- literal array arguments: same codegen bug as element_at with literal arrays
query ignore(Spark codegen bug with literal element_at when constant folding is disabled)
SELECT try_element_at(array(10, 20, 30), 1), try_element_at(array(10, 20, 30), 99)

-- map input falls back to Spark
query spark_answer_only
SELECT try_element_at(m, 'a'), try_element_at(m, 'missing') FROM test_try_element_at
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- MinSparkVersion: 4.0

statement
CREATE TABLE test_nullif_zeroif(v int) USING parquet

statement
INSERT INTO test_nullif_zeroif VALUES (0), (1), (42), (-5), (NULL)

-- nullifzero: returns NULL when input is 0, otherwise returns input
query
SELECT nullifzero(v) FROM test_nullif_zeroif

-- zeroifnull: returns 0 when input is NULL, otherwise returns input
query
SELECT zeroifnull(v) FROM test_nullif_zeroif

-- literal arguments for nullifzero
query
SELECT nullifzero(0), nullifzero(1), nullifzero(-5), nullifzero(NULL)

-- literal arguments for zeroifnull
query
SELECT zeroifnull(0), zeroifnull(1), zeroifnull(-5), zeroifnull(NULL)
44 changes: 44 additions & 0 deletions spark/src/test/resources/sql-tests/expressions/misc/equal_null.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- MinSparkVersion: 3.4

statement
CREATE TABLE test_equal_null(a int, b int) USING parquet

statement
INSERT INTO test_equal_null VALUES (1, 1), (1, 2), (NULL, NULL), (NULL, 1), (1, NULL)

-- equal_null: same as <=> (null-safe equality)
query
SELECT equal_null(a, b) FROM test_equal_null

-- literal arguments: both NULL
query
SELECT equal_null(NULL, NULL)

-- literal arguments: one NULL
query
SELECT equal_null(NULL, 1), equal_null(1, NULL)

-- literal arguments: equal values
query
SELECT equal_null(3, 3)

-- literal arguments: unequal values
query
SELECT equal_null(3, 4)
38 changes: 38 additions & 0 deletions spark/src/test/resources/sql-tests/expressions/misc/uniform.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- MinSparkVersion: 4.0

-- uniform(lo, hi, seed) returns a uniformly distributed random integer in [lo, hi].
-- All arguments must be literals; Spark rejects column arguments.
-- A fixed seed produces a deterministic result.

-- integer range with seed 0
query
SELECT uniform(1, 10, 0)

-- same bounds (lo == hi) always returns that value
query
SELECT uniform(5, 5, 42)

-- single-valued range at 0
query
SELECT uniform(0, 0, 1)

-- wider range with different seed
query
SELECT uniform(100, 200, 12345)
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- MinSparkVersion: 4.0

-- collation(expr) returns the collation name of a string expression.
-- It folds to a string literal at planning time, so Comet evaluates it natively.

-- default collation on a string literal
query
SELECT collation('abc')

-- collation of an explicit UTF8_BINARY string
query
SELECT collation('hello' COLLATE UTF8_BINARY)

-- collation of a NULL string
query
SELECT collation(CAST(NULL AS STRING))
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.

-- to_binary with 'hex' format lowers to Unhex, which Comet accelerates.
-- to_binary with 'utf-8' and 'base64' formats fall back to Spark.

statement
CREATE TABLE test_to_binary(s string) USING parquet

statement
INSERT INTO test_to_binary VALUES ('537061726B'), ('41'), ('0A1B'), (''), (NULL)

-- hex format: accelerated via Unhex
query
SELECT to_binary(s, 'hex') FROM test_to_binary

-- literal hex arguments
query
SELECT to_binary('41', 'hex'), to_binary('0A1B', 'hex'), to_binary('', 'hex'), to_binary(NULL, 'hex')

-- utf-8 format falls back to Spark
query spark_answer_only
SELECT to_binary(s, 'utf-8') FROM test_to_binary

-- base64 format falls back to Spark
query spark_answer_only
SELECT to_binary(s, 'base64') FROM test_to_binary
Loading