Skip to content

Commit

Permalink
add window.sql
Browse files Browse the repository at this point in the history
  • Loading branch information
jiangxb1987 committed Jul 10, 2017
1 parent 680b33f commit 2e3836c
Show file tree
Hide file tree
Showing 2 changed files with 228 additions and 0 deletions.
53 changes: 53 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/window.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
-- Test data.
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, "a"), (3, null), (null, null)
AS testData(val, cate);

-- RowsBetween
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData;
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData;

-- RangeBetween
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData;
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData;

-- RangeBetween with reverse OrderBy
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData;

-- window functions
SELECT val, cate,
max(val) OVER(PARTITION BY cate ORDER BY val),
min(val) OVER(PARTITION BY cate ORDER BY val),
mean(val) OVER(PARTITION BY cate ORDER BY val),
count(val) OVER(PARTITION BY cate ORDER BY val),
sum(val) OVER(PARTITION BY cate ORDER BY val),
avg(val) OVER(PARTITION BY cate ORDER BY val),
stddev(val) OVER(PARTITION BY cate ORDER BY val),
first_value(val) OVER(PARTITION BY cate ORDER BY val),
first_value(val, true) OVER(PARTITION BY cate ORDER BY val),
first_value(val, false) OVER(PARTITION BY cate ORDER BY val),
last_value(val) OVER(PARTITION BY cate ORDER BY val),
last_value(val, true) OVER(PARTITION BY cate ORDER BY val),
last_value(val, false) OVER(PARTITION BY cate ORDER BY val),
rank() OVER(PARTITION BY cate ORDER BY val),
dense_rank() OVER(PARTITION BY cate ORDER BY val),
cume_dist() OVER(PARTITION BY cate ORDER BY val),
percent_rank() OVER(PARTITION BY cate ORDER BY val),
ntile(2) OVER(PARTITION BY cate ORDER BY val),
row_number() OVER(PARTITION BY cate ORDER BY val),
var_pop(val) OVER(PARTITION BY cate ORDER BY val),
var_samp(val) OVER(PARTITION BY cate ORDER BY val),
approx_count_distinct(val) OVER(PARTITION BY cate ORDER BY val)
FROM testData;

-- Null inputs
SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData;

-- OrderBy not specified
SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData;

-- Over clause is empty
SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData;
175 changes: 175 additions & 0 deletions sql/core/src/test/resources/sql-tests/results/window.sql.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 10


-- !query 0
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, "a"), (3, null), (null, null)
AS testData(val, cate)
-- !query 0 schema
struct<>
-- !query 0 output



-- !query 1
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
-- !query 1 schema
struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND CURRENT ROW):bigint>
-- !query 1 output
1 a 1
1 a 1
1 b 1
2 a 1
2 b 1
3 NULL 1
3 b 1
NULL NULL 0
NULL a 0


-- !query 2
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData
-- !query 2 schema
struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING):bigint>
-- !query 2 output
1 a 2
1 a 4
1 b 3
2 a 4
2 b 6
3 NULL 3
3 b 6
NULL NULL 3
NULL a 1


-- !query 3
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData
-- !query 3 schema
struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND CURRENT ROW):bigint>
-- !query 3 output
1 a 2
1 a 2
1 b 1
2 a 3
2 b 2
3 NULL 1
3 b 2
NULL NULL 0
NULL a 0


-- !query 4
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData
-- !query 4 schema
struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
-- !query 4 output
1 a 4
1 a 4
1 b 3
2 a 2
2 b 5
3 NULL 3
3 b 3
NULL NULL NULL
NULL a NULL


-- !query 5
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData
-- !query 5 schema
struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
-- !query 5 output
1 a 2
1 a 2
1 b 1
2 a 4
2 b 3
3 NULL 3
3 b 5
NULL NULL NULL
NULL a NULL


-- !query 6
SELECT val, cate,
max(val) OVER(PARTITION BY cate ORDER BY val),
min(val) OVER(PARTITION BY cate ORDER BY val),
mean(val) OVER(PARTITION BY cate ORDER BY val),
count(val) OVER(PARTITION BY cate ORDER BY val),
sum(val) OVER(PARTITION BY cate ORDER BY val),
avg(val) OVER(PARTITION BY cate ORDER BY val),
stddev(val) OVER(PARTITION BY cate ORDER BY val),
first_value(val) OVER(PARTITION BY cate ORDER BY val),
first_value(val, true) OVER(PARTITION BY cate ORDER BY val),
first_value(val, false) OVER(PARTITION BY cate ORDER BY val),
last_value(val) OVER(PARTITION BY cate ORDER BY val),
last_value(val, true) OVER(PARTITION BY cate ORDER BY val),
last_value(val, false) OVER(PARTITION BY cate ORDER BY val),
rank() OVER(PARTITION BY cate ORDER BY val),
dense_rank() OVER(PARTITION BY cate ORDER BY val),
cume_dist() OVER(PARTITION BY cate ORDER BY val),
percent_rank() OVER(PARTITION BY cate ORDER BY val),
ntile(2) OVER(PARTITION BY cate ORDER BY val),
row_number() OVER(PARTITION BY cate ORDER BY val),
var_pop(val) OVER(PARTITION BY cate ORDER BY val),
var_samp(val) OVER(PARTITION BY cate ORDER BY val),
approx_count_distinct(val) OVER(PARTITION BY cate ORDER BY val)
FROM testData
-- !query 6 schema
struct<val:int,cate:string,max(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,min(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,avg(CAST(val AS BIGINT)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint,sum(CAST(val AS BIGINT)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint,avg(CAST(val AS BIGINT)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double,stddev_samp(CAST(val AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double,first(val, false) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,first(val, true) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,first(val, false) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,last(val, false) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,last(val, true) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,last(val, false) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,RANK() OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,DENSE_RANK() OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,cume_dist() OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double,PERCENT_RANK() OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double,ntile(2) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,row_number() OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,var_pop(CAST(val AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double,var_samp(CAST(val AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double,approx_count_distinct(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
-- !query 6 output
1 a 1 1 1.0 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 1 2 0.0 0.0 1
1 a 1 1 1.0 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 2 3 0.0 0.0 1
1 b 1 1 1.0 1 1 1.0 NaN 1 1 1 1 1 1 1 1 0.3333333333333333 0.0 1 1 0.0 NaN 1
2 a 2 1 1.3333333333333333 3 4 1.3333333333333333 0.5773502691896258 NULL 1 NULL 2 2 2 4 3 1.0 1.0 2 4 0.22222222222222224 0.33333333333333337 2
2 b 2 1 1.5 2 3 1.5 0.7071067811865476 1 1 1 2 2 2 2 2 0.6666666666666666 0.5 1 2 0.25 0.5 2
3 NULL 3 3 3.0 1 3 3.0 NaN NULL 3 NULL 3 3 3 2 2 1.0 1.0 2 2 0.0 NaN 1
3 b 3 1 2.0 3 6 2.0 1.0 1 1 1 3 3 3 3 3 1.0 1.0 2 3 0.6666666666666666 1.0 3
NULL NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.5 0.0 1 1 NULL NULL 0
NULL a NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.25 0.0 1 1 NULL NULL 0


-- !query 7
SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData
-- !query 7 schema
struct<val:int,cate:string,avg(CAST(NULL AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
-- !query 7 output
1 a NULL
1 a NULL
1 b NULL
2 a NULL
2 b NULL
3 NULL NULL
3 b NULL
NULL NULL NULL
NULL a NULL


-- !query 8
SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData
-- !query 8 schema
struct<>
-- !query 8 output
org.apache.spark.sql.AnalysisException
Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table;


-- !query 9
SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData
-- !query 9 schema
struct<val:int,cate:string,sum(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,avg(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
-- !query 9 output
1 a 13 1.8571428571428572
1 a 13 1.8571428571428572
1 b 13 1.8571428571428572
2 a 13 1.8571428571428572
2 b 13 1.8571428571428572
3 NULL 13 1.8571428571428572
3 b 13 1.8571428571428572
NULL NULL 13 1.8571428571428572
NULL a 13 1.8571428571428572

0 comments on commit 2e3836c

Please sign in to comment.