Skip to content

Commit

Permalink
[SPARK-33456][SQL][TEST] Add end-to-end test for subexpression elimin…
Browse files Browse the repository at this point in the history
…ation

### What changes were proposed in this pull request?

This patch proposes to add end-to-end test for subexpression elimination.

### Why are the changes needed?

We have subexpression elimination feature for expression evaluation but we don't have end-to-end tests for the feature. We should have one to make sure we don't break it.

### Does this PR introduce _any_ user-facing change?

No, dev only.

### How was this patch tested?

Unit tests.

Closes #30381 from viirya/SPARK-33456.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
  • Loading branch information
viirya authored and HyukjinKwon committed Nov 16, 2020
1 parent 10105b5 commit d4cf148
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
-- Test for subexpression elimination.

--SET spark.sql.optimizer.enableJsonExpressionOptimization=false

--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
--CONFIG_DIM1 spark.sql.codegen.wholeStage=false

--CONFIG_DIM2 spark.sql.codegen.factoryMode=CODEGEN_ONLY
--CONFIG_DIM2 spark.sql.codegen.factoryMode=NO_CODEGEN

--CONFIG_DIM3 SUBEXPRESSION_ELIMINATION_ENABLED=true
--CONFIG_DIM3 SUBEXPRESSION_ELIMINATION_ENABLED=false

-- Test data.
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
('{"a":1, "b":"2"}', '[{"a": 1, "b":2}, {"a":2, "b":2}]'), ('{"a":1, "b":"2"}', null), ('{"a":2, "b":"3"}', '[{"a": 3, "b":4}, {"a":4, "b":5}]'), ('{"a":5, "b":"6"}', '[{"a": 6, "b":7}, {"a":8, "b":9}]'), (null, '[{"a": 1, "b":2}, {"a":2, "b":2}]')
AS testData(a, b);

SELECT from_json(a, 'struct<a:int,b:string>').a, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b FROM testData;

SELECT if(from_json(a, 'struct<a:int,b:string>').a > 1, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData;

SELECT if(isnull(from_json(a, 'struct<a:int,b:string>').a), from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1, from_json(b, 'array<struct<a:int,b:int>>')[0].b) FROM testData;

SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 else from_json(a, 'struct<a:int,b:string>').b + 2 end FROM testData;

SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(b, 'array<struct<a:int,b:int>>')[0].b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1 else from_json(b, 'array<struct<a:int,b:int>>')[0].b + 2 end FROM testData;

-- With non-deterministic expressions.
SELECT from_json(a, 'struct<a:int,b:string>').a + random() > 2, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b + + random() > 2 FROM testData;

SELECT if(from_json(a, 'struct<a:int,b:string>').a + random() > 5, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData;

SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b + random() > 5 when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 + random() > 2 else from_json(a, 'struct<a:int,b:string>').b + 2 + random() > 5 end FROM testData;

-- Clean up
DROP VIEW IF EXISTS testData;
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 10


-- !query
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
('{"a":1, "b":"2"}', '[{"a": 1, "b":2}, {"a":2, "b":2}]'), ('{"a":1, "b":"2"}', null), ('{"a":2, "b":"3"}', '[{"a": 3, "b":4}, {"a":4, "b":5}]'), ('{"a":5, "b":"6"}', '[{"a": 6, "b":7}, {"a":8, "b":9}]'), (null, '[{"a": 1, "b":2}, {"a":2, "b":2}]')
AS testData(a, b)
-- !query schema
struct<>
-- !query output



-- !query
SELECT from_json(a, 'struct<a:int,b:string>').a, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b FROM testData
-- !query schema
struct<from_json(a).a:int,from_json(a).b:string,from_json(b)[0].a:int,from_json(b)[0].b:int>
-- !query output
1 2 1 2
1 2 NULL NULL
2 3 3 4
5 6 6 7
NULL NULL 1 2


-- !query
SELECT if(from_json(a, 'struct<a:int,b:string>').a > 1, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData
-- !query schema
struct<(IF((from_json(a).a > 1), from_json(b)[0].a, (from_json(b)[0].a + 1))):int>
-- !query output
2
2
3
6
NULL


-- !query
SELECT if(isnull(from_json(a, 'struct<a:int,b:string>').a), from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1, from_json(b, 'array<struct<a:int,b:int>>')[0].b) FROM testData
-- !query schema
struct<(IF((from_json(a).a IS NULL), (from_json(b)[0].b + 1), from_json(b)[0].b)):int>
-- !query output
2
3
4
7
NULL


-- !query
SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 else from_json(a, 'struct<a:int,b:string>').b + 2 end FROM testData
-- !query schema
struct<CASE WHEN (from_json(a).a > 5) THEN from_json(a).b WHEN (from_json(a).a > 4) THEN CAST((CAST(from_json(a).b AS DOUBLE) + CAST(1 AS DOUBLE)) AS STRING) ELSE CAST((CAST(from_json(a).b AS DOUBLE) + CAST(2 AS DOUBLE)) AS STRING) END:string>
-- !query output
4.0
4.0
5.0
7.0
NULL


-- !query
SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(b, 'array<struct<a:int,b:int>>')[0].b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1 else from_json(b, 'array<struct<a:int,b:int>>')[0].b + 2 end FROM testData
-- !query schema
struct<CASE WHEN (from_json(a).a > 5) THEN from_json(b)[0].b WHEN (from_json(a).a > 4) THEN (from_json(b)[0].b + 1) ELSE (from_json(b)[0].b + 2) END:int>
-- !query output
4
4
6
8
NULL


-- !query
SELECT from_json(a, 'struct<a:int,b:string>').a + random() > 2, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b + + random() > 2 FROM testData
-- !query schema
struct<((CAST(from_json(a).a AS DOUBLE) + rand()) > CAST(2 AS DOUBLE)):boolean,from_json(a).b:string,from_json(b)[0].a:int,((CAST(from_json(b)[0].b AS DOUBLE) + (+ rand())) > CAST(2 AS DOUBLE)):boolean>
-- !query output
NULL NULL 1 true
false 2 1 true
false 2 NULL NULL
true 3 3 true
true 6 6 true


-- !query
SELECT if(from_json(a, 'struct<a:int,b:string>').a + random() > 5, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData
-- !query schema
struct<(IF(((CAST(from_json(a).a AS DOUBLE) + rand()) > CAST(5 AS DOUBLE)), from_json(b)[0].a, (from_json(b)[0].a + 1))):int>
-- !query output
2
2
4
6
NULL


-- !query
SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b + random() > 5 when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 + random() > 2 else from_json(a, 'struct<a:int,b:string>').b + 2 + random() > 5 end FROM testData
-- !query schema
struct<CASE WHEN (from_json(a).a > 5) THEN ((CAST(from_json(a).b AS DOUBLE) + rand()) > CAST(5 AS DOUBLE)) WHEN (from_json(a).a > 4) THEN (((CAST(from_json(a).b AS DOUBLE) + CAST(1 AS DOUBLE)) + rand()) > CAST(2 AS DOUBLE)) ELSE (((CAST(from_json(a).b AS DOUBLE) + CAST(2 AS DOUBLE)) + rand()) > CAST(5 AS DOUBLE)) END:boolean>
-- !query output
NULL
false
false
true
true


-- !query
DROP VIEW IF EXISTS testData
-- !query schema
struct<>
-- !query output

0 comments on commit d4cf148

Please sign in to comment.