-
Notifications
You must be signed in to change notification settings - Fork 28.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-33456][SQL][TEST] Add end-to-end test for subexpression elimin…
…ation ### What changes were proposed in this pull request? This patch proposes to add end-to-end test for subexpression elimination. ### Why are the changes needed? We have subexpression elimination feature for expression evaluation but we don't have end-to-end tests for the feature. We should have one to make sure we don't break it. ### Does this PR introduce _any_ user-facing change? No, dev only. ### How was this patch tested? Unit tests. Closes #30381 from viirya/SPARK-33456. Authored-by: Liang-Chi Hsieh <viirya@gmail.com> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
- Loading branch information
1 parent
10105b5
commit d4cf148
Showing
2 changed files
with
153 additions
and
0 deletions.
There are no files selected for viewing
37 changes: 37 additions & 0 deletions
37
sql/core/src/test/resources/sql-tests/inputs/subexp-elimination.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
-- Test for subexpression elimination. | ||
|
||
--SET spark.sql.optimizer.enableJsonExpressionOptimization=false | ||
|
||
--CONFIG_DIM1 spark.sql.codegen.wholeStage=true | ||
--CONFIG_DIM1 spark.sql.codegen.wholeStage=false | ||
|
||
--CONFIG_DIM2 spark.sql.codegen.factoryMode=CODEGEN_ONLY | ||
--CONFIG_DIM2 spark.sql.codegen.factoryMode=NO_CODEGEN | ||
|
||
--CONFIG_DIM3 SUBEXPRESSION_ELIMINATION_ENABLED=true | ||
--CONFIG_DIM3 SUBEXPRESSION_ELIMINATION_ENABLED=false | ||
|
||
-- Test data. | ||
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES | ||
('{"a":1, "b":"2"}', '[{"a": 1, "b":2}, {"a":2, "b":2}]'), ('{"a":1, "b":"2"}', null), ('{"a":2, "b":"3"}', '[{"a": 3, "b":4}, {"a":4, "b":5}]'), ('{"a":5, "b":"6"}', '[{"a": 6, "b":7}, {"a":8, "b":9}]'), (null, '[{"a": 1, "b":2}, {"a":2, "b":2}]') | ||
AS testData(a, b); | ||
|
||
SELECT from_json(a, 'struct<a:int,b:string>').a, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b FROM testData; | ||
|
||
SELECT if(from_json(a, 'struct<a:int,b:string>').a > 1, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData; | ||
|
||
SELECT if(isnull(from_json(a, 'struct<a:int,b:string>').a), from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1, from_json(b, 'array<struct<a:int,b:int>>')[0].b) FROM testData; | ||
|
||
SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 else from_json(a, 'struct<a:int,b:string>').b + 2 end FROM testData; | ||
|
||
SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(b, 'array<struct<a:int,b:int>>')[0].b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1 else from_json(b, 'array<struct<a:int,b:int>>')[0].b + 2 end FROM testData; | ||
|
||
-- With non-deterministic expressions. | ||
SELECT from_json(a, 'struct<a:int,b:string>').a + random() > 2, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b + + random() > 2 FROM testData; | ||
|
||
SELECT if(from_json(a, 'struct<a:int,b:string>').a + random() > 5, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData; | ||
|
||
SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b + random() > 5 when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 + random() > 2 else from_json(a, 'struct<a:int,b:string>').b + 2 + random() > 5 end FROM testData; | ||
|
||
-- Clean up | ||
DROP VIEW IF EXISTS testData; |
116 changes: 116 additions & 0 deletions
116
sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
-- Automatically generated by SQLQueryTestSuite | ||
-- Number of queries: 10 | ||
|
||
|
||
-- !query | ||
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES | ||
('{"a":1, "b":"2"}', '[{"a": 1, "b":2}, {"a":2, "b":2}]'), ('{"a":1, "b":"2"}', null), ('{"a":2, "b":"3"}', '[{"a": 3, "b":4}, {"a":4, "b":5}]'), ('{"a":5, "b":"6"}', '[{"a": 6, "b":7}, {"a":8, "b":9}]'), (null, '[{"a": 1, "b":2}, {"a":2, "b":2}]') | ||
AS testData(a, b) | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT from_json(a, 'struct<a:int,b:string>').a, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b FROM testData | ||
-- !query schema | ||
struct<from_json(a).a:int,from_json(a).b:string,from_json(b)[0].a:int,from_json(b)[0].b:int> | ||
-- !query output | ||
1 2 1 2 | ||
1 2 NULL NULL | ||
2 3 3 4 | ||
5 6 6 7 | ||
NULL NULL 1 2 | ||
|
||
|
||
-- !query | ||
SELECT if(from_json(a, 'struct<a:int,b:string>').a > 1, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData | ||
-- !query schema | ||
struct<(IF((from_json(a).a > 1), from_json(b)[0].a, (from_json(b)[0].a + 1))):int> | ||
-- !query output | ||
2 | ||
2 | ||
3 | ||
6 | ||
NULL | ||
|
||
|
||
-- !query | ||
SELECT if(isnull(from_json(a, 'struct<a:int,b:string>').a), from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1, from_json(b, 'array<struct<a:int,b:int>>')[0].b) FROM testData | ||
-- !query schema | ||
struct<(IF((from_json(a).a IS NULL), (from_json(b)[0].b + 1), from_json(b)[0].b)):int> | ||
-- !query output | ||
2 | ||
3 | ||
4 | ||
7 | ||
NULL | ||
|
||
|
||
-- !query | ||
SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 else from_json(a, 'struct<a:int,b:string>').b + 2 end FROM testData | ||
-- !query schema | ||
struct<CASE WHEN (from_json(a).a > 5) THEN from_json(a).b WHEN (from_json(a).a > 4) THEN CAST((CAST(from_json(a).b AS DOUBLE) + CAST(1 AS DOUBLE)) AS STRING) ELSE CAST((CAST(from_json(a).b AS DOUBLE) + CAST(2 AS DOUBLE)) AS STRING) END:string> | ||
-- !query output | ||
4.0 | ||
4.0 | ||
5.0 | ||
7.0 | ||
NULL | ||
|
||
|
||
-- !query | ||
SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(b, 'array<struct<a:int,b:int>>')[0].b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1 else from_json(b, 'array<struct<a:int,b:int>>')[0].b + 2 end FROM testData | ||
-- !query schema | ||
struct<CASE WHEN (from_json(a).a > 5) THEN from_json(b)[0].b WHEN (from_json(a).a > 4) THEN (from_json(b)[0].b + 1) ELSE (from_json(b)[0].b + 2) END:int> | ||
-- !query output | ||
4 | ||
4 | ||
6 | ||
8 | ||
NULL | ||
|
||
|
||
-- !query | ||
SELECT from_json(a, 'struct<a:int,b:string>').a + random() > 2, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b + + random() > 2 FROM testData | ||
-- !query schema | ||
struct<((CAST(from_json(a).a AS DOUBLE) + rand()) > CAST(2 AS DOUBLE)):boolean,from_json(a).b:string,from_json(b)[0].a:int,((CAST(from_json(b)[0].b AS DOUBLE) + (+ rand())) > CAST(2 AS DOUBLE)):boolean> | ||
-- !query output | ||
NULL NULL 1 true | ||
false 2 1 true | ||
false 2 NULL NULL | ||
true 3 3 true | ||
true 6 6 true | ||
|
||
|
||
-- !query | ||
SELECT if(from_json(a, 'struct<a:int,b:string>').a + random() > 5, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData | ||
-- !query schema | ||
struct<(IF(((CAST(from_json(a).a AS DOUBLE) + rand()) > CAST(5 AS DOUBLE)), from_json(b)[0].a, (from_json(b)[0].a + 1))):int> | ||
-- !query output | ||
2 | ||
2 | ||
4 | ||
6 | ||
NULL | ||
|
||
|
||
-- !query | ||
SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b + random() > 5 when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 + random() > 2 else from_json(a, 'struct<a:int,b:string>').b + 2 + random() > 5 end FROM testData | ||
-- !query schema | ||
struct<CASE WHEN (from_json(a).a > 5) THEN ((CAST(from_json(a).b AS DOUBLE) + rand()) > CAST(5 AS DOUBLE)) WHEN (from_json(a).a > 4) THEN (((CAST(from_json(a).b AS DOUBLE) + CAST(1 AS DOUBLE)) + rand()) > CAST(2 AS DOUBLE)) ELSE (((CAST(from_json(a).b AS DOUBLE) + CAST(2 AS DOUBLE)) + rand()) > CAST(5 AS DOUBLE)) END:boolean> | ||
-- !query output | ||
NULL | ||
false | ||
false | ||
true | ||
true | ||
|
||
|
||
-- !query | ||
DROP VIEW IF EXISTS testData | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
|