diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql new file mode 100644 index 0000000000000..205c7d391a973 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql @@ -0,0 +1,451 @@ +-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group +-- +-- Window Functions Testing +-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/window.sql#L564-L911 + +CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1; + +CREATE TABLE empsalary ( + depname string, + empno integer, + salary int, + enroll_date date +) USING parquet; + +INSERT INTO empsalary VALUES + ('develop', 10, 5200, date '2007-08-01'), + ('sales', 1, 5000, date '2006-10-01'), + ('personnel', 5, 3500, date '2007-12-10'), + ('sales', 4, 4800, date '2007-08-08'), + ('personnel', 2, 3900, date '2006-12-23'), + ('develop', 7, 4200, date '2008-01-01'), + ('develop', 9, 4500, date '2008-01-01'), + ('sales', 3, 4800, date '2007-08-01'), + ('develop', 8, 6000, date '2006-10-01'), + ('develop', 11, 5200, date '2007-08-15'); + +-- Test in_range for other datetime datatypes + +-- Spark only supports timestamp +-- [SPARK-29636] Spark can't parse '11:00 BST' or '2000-10-19 10:23:54+01' signatures to timestamp +create table datetimes ( + id int, + f_time timestamp, + f_timetz timestamp, + f_interval timestamp, + f_timestamptz timestamp, + f_timestamp timestamp +) using parquet; + +-- Spark cannot safely cast StringType to TimestampType +-- [SPARK-29636] Spark can't parse '11:00 BST' or '2000-10-19 10:23:54+01' signatures to timestamp +insert into datetimes values +(1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), cast ('2000-10-19 10:23:54+01' as timestamp), timestamp '2000-10-19 10:23:54'), +(2, timestamp '12:00', cast ('12:00 BST' as timestamp), cast ('2 years' as timestamp), cast ('2001-10-19 10:23:54+01' as timestamp), timestamp '2001-10-19 10:23:54'), +(3, timestamp '13:00', cast ('13:00 BST' as timestamp), cast ('3 years' as timestamp), cast ('2001-10-19 10:23:54+01' as timestamp), timestamp '2001-10-19 10:23:54'), +(4, timestamp '14:00', cast ('14:00 BST' as timestamp), cast ('4 years' as timestamp), cast ('2002-10-19 10:23:54+01' as timestamp), timestamp '2002-10-19 10:23:54'), +(5, timestamp '15:00', cast ('15:00 BST' as timestamp), cast ('5 years' as timestamp), cast ('2003-10-19 10:23:54+01' as timestamp), timestamp '2003-10-19 10:23:54'), +(6, timestamp '15:00', cast ('15:00 BST' as timestamp), cast ('5 years' as timestamp), cast ('2004-10-19 10:23:54+01' as timestamp), timestamp '2004-10-19 10:23:54'), +(7, timestamp '17:00', cast ('17:00 BST' as timestamp), cast ('7 years' as timestamp), cast ('2005-10-19 10:23:54+01' as timestamp), timestamp '2005-10-19 10:23:54'), +(8, timestamp '18:00', cast ('18:00 BST' as timestamp), cast ('8 years' as timestamp), cast ('2006-10-19 10:23:54+01' as timestamp), timestamp '2006-10-19 10:23:54'), +(9, timestamp '19:00', cast ('19:00 BST' as timestamp), cast ('9 years' as timestamp), cast ('2007-10-19 10:23:54+01' as timestamp), timestamp '2007-10-19 10:23:54'), +(10, timestamp '20:00', cast ('20:00 BST' as timestamp), cast ('10 years' as timestamp), cast ('2008-10-19 10:23:54+01' as timestamp), timestamp '2008-10-19 10:23:54'); + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- select id, f_time, first(id) over w, last(id) over w +-- from datetimes +-- window w as (order by f_time range between +-- '70 min' preceding and '2 hours' following); + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- select id, f_time, first(id) over w, last(id) over w +-- from datetimes +-- window w as (order by f_time desc range between +-- '70 min' preceding and '2 hours' following); + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- select id, f_timetz, first(id) over w, last(id) over w +-- from datetimes +-- window w as (order by f_timetz range between +-- '70 min' preceding and '2 hours' following); + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- select id, f_timetz, first(id) over w, last(id) over w +-- from datetimes +-- window w as (order by f_timetz desc range between +-- '70 min' preceding and '2 hours' following); + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- select id, f_interval, first(id) over w, last(id) over w +-- from datetimes +-- window w as (order by f_interval range between +-- '1 year' preceding and '1 year' following); + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- select id, f_interval, first(id) over w, last(id) over w +-- from datetimes +-- window w as (order by f_interval desc range between +-- '1 year' preceding and '1 year' following); + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- select id, f_timestamptz, first(id) over w, last(id) over w +-- from datetimes +-- window w as (order by f_timestamptz range between +-- '1 year' preceding and '1 year' following); + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- select id, f_timestamptz, first(id) over w, last(id) over w +-- from datetimes +-- window w as (order by f_timestamptz desc range between +-- '1 year' preceding and '1 year' following); + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- select id, f_timestamp, first(id) over w, last(id) over w +-- from datetimes +-- window w as (order by f_timestamp range between +-- '1 year' preceding and '1 year' following); + +-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp +-- select id, f_timestamp, first(id) over w, last(id) over w +-- from datetimes +-- window w as (order by f_timestamp desc range between +-- '1 year' preceding and '1 year' following); + +-- RANGE offset PRECEDING/FOLLOWING error cases +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select sum(salary) over (order by enroll_date, salary range between '1 year' preceding and '2 years' following +-- exclude ties), salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select sum(salary) over (range between '1 year' preceding and '2 years' following +-- exclude ties), salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select sum(salary) over (order by depname range between '1 year' preceding and '2 years' following +-- exclude ties), salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select max(enroll_date) over (order by enroll_date range between 1 preceding and 2 following +-- exclude ties), salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select max(enroll_date) over (order by salary range between -1 preceding and 2 following +-- exclude ties), salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select max(enroll_date) over (order by salary range between 1 preceding and -2 following +-- exclude ties), salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select max(enroll_date) over (order by salary range between '1 year' preceding and '2 years' following +-- exclude ties), salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select max(enroll_date) over (order by enroll_date range between '1 year' preceding and '-2 years' following +-- exclude ties), salary, enroll_date from empsalary; + +-- GROUPS tests + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- SELECT sum(unique1) over (order by four groups between unbounded preceding and current row), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- SELECT sum(unique1) over (order by four groups between unbounded preceding and unbounded following), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- SELECT sum(unique1) over (order by four groups between current row and unbounded following), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- SELECT sum(unique1) over (order by four groups between 1 preceding and unbounded following), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- SELECT sum(unique1) over (order by four groups between 1 following and unbounded following), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- SELECT sum(unique1) over (order by four groups between unbounded preceding and 2 following), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- SELECT sum(unique1) over (order by four groups between 2 preceding and 1 preceding), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- SELECT sum(unique1) over (order by four groups between 2 preceding and 1 following), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- SELECT sum(unique1) over (order by four groups between 0 preceding and 0 following), +-- unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- SELECT sum(unique1) over (order by four groups between 2 preceding and 1 following +-- exclude current row), unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 following +-- exclude group), unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 following +-- exclude ties), unique1, four +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- SELECT sum(unique1) over (partition by ten +-- order by four groups between 0 preceding and 0 following),unique1, four, ten +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- SELECT sum(unique1) over (partition by ten +-- order by four groups between 0 preceding and 0 following exclude current row), unique1, four, ten +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- SELECT sum(unique1) over (partition by ten +-- order by four groups between 0 preceding and 0 following exclude group), unique1, four, ten +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- SELECT sum(unique1) over (partition by ten +-- order by four groups between 0 preceding and 0 following exclude ties), unique1, four, ten +-- FROM tenk1 WHERE unique1 < 10; + +-- [SPARK-27951] ANSI SQL: NTH_VALUE function +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- select first_value(salary) over(order by enroll_date groups between 1 preceding and 1 following), +-- lead(salary) over(order by enroll_date groups between 1 preceding and 1 following), +-- nth_value(salary, 1) over(order by enroll_date groups between 1 preceding and 1 following), +-- salary, enroll_date from empsalary; + +-- [SPARK-28508] Support for range frame+row frame in the same query +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- select last(salary) over(order by enroll_date groups between 1 preceding and 1 following), +-- lag(salary) over(order by enroll_date groups between 1 preceding and 1 following), +-- salary, enroll_date from empsalary; + +-- [SPARK-27951] ANSI SQL: NTH_VALUE function +-- select first_value(salary) over(order by enroll_date groups between 1 following and 3 following +-- exclude current row), +-- lead(salary) over(order by enroll_date groups between 1 following and 3 following exclude ties), +-- nth_value(salary, 1) over(order by enroll_date groups between 1 following and 3 following +-- exclude ties), +-- salary, enroll_date from empsalary; + +-- [SPARK-28428] Spark `exclude` always expecting `()` +-- select last(salary) over(order by enroll_date groups between 1 following and 3 following +-- exclude group), +-- lag(salary) over(order by enroll_date groups between 1 following and 3 following exclude group), +-- salary, enroll_date from empsalary; + +-- Show differences in offset interpretation between ROWS, RANGE, and GROUPS +WITH cte (x) AS ( + SELECT * FROM range(1, 36, 2) +) +SELECT x, (sum(x) over w) +FROM cte +WINDOW w AS (ORDER BY x rows between 1 preceding and 1 following); + +WITH cte (x) AS ( + SELECT * FROM range(1, 36, 2) +) +SELECT x, (sum(x) over w) +FROM cte +WINDOW w AS (ORDER BY x range between 1 preceding and 1 following); + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- WITH cte (x) AS ( +-- SELECT * FROM range(1, 36, 2) +-- ) +-- SELECT x, (sum(x) over w) +-- FROM cte +-- WINDOW w AS (ORDER BY x groups between 1 preceding and 1 following); + +WITH cte (x) AS ( + select 1 union all select 1 union all select 1 union all + SELECT * FROM range(5, 50, 2) +) +SELECT x, (sum(x) over w) +FROM cte +WINDOW w AS (ORDER BY x rows between 1 preceding and 1 following); + +WITH cte (x) AS ( + select 1 union all select 1 union all select 1 union all + SELECT * FROM range(5, 50, 2) +) +SELECT x, (sum(x) over w) +FROM cte +WINDOW w AS (ORDER BY x range between 1 preceding and 1 following); + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- WITH cte (x) AS ( +-- select 1 union all select 1 union all select 1 union all +-- SELECT * FROM range(5, 50, 2) +-- ) +-- SELECT x, (sum(x) over w) +-- FROM cte +-- WINDOW w AS (ORDER BY x groups between 1 preceding and 1 following); + +-- with UNION +SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk2)s LIMIT 0; + +-- check some degenerate cases +create table t1 (f1 int, f2 int) using parquet; +insert into t1 values (1,1),(1,2),(2,2); + +select f1, sum(f1) over (partition by f1 + range between 1 preceding and 1 following) +from t1 where f1 = f2; -- error, must have order by + +-- Since EXPLAIN clause rely on host physical location, it is commented out +-- explain +-- select f1, sum(f1) over (partition by f1 order by f2 +-- range between 1 preceding and 1 following) +-- from t1 where f1 = f2; + +select f1, sum(f1) over (partition by f1 order by f2 +range between 1 preceding and 1 following) +from t1 where f1 = f2; + +select f1, sum(f1) over (partition by f1, f1 order by f2 +range between 2 preceding and 1 preceding) +from t1 where f1 = f2; + +select f1, sum(f1) over (partition by f1, f2 order by f2 +range between 1 following and 2 following) +from t1 where f1 = f2; + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- select f1, sum(f1) over (partition by f1, +-- groups between 1 preceding and 1 following) +-- from t1 where f1 = f2; + +-- Since EXPLAIN clause rely on host physical location, it is commented out +-- explain +-- select f1, sum(f1) over (partition by f1 order by f2 +-- range between 1 preceding and 1 following) +-- from t1 where f1 = f2; + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- select f1, sum(f1) over (partition by f1 order by f2 +-- groups between 1 preceding and 1 following) +-- from t1 where f1 = f2; + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- select f1, sum(f1) over (partition by f1, f1 order by f2 +-- groups between 2 preceding and 1 preceding) +-- from t1 where f1 = f2; + +-- [SPARK-28648] Adds support to `groups` unit type in window clauses +-- select f1, sum(f1) over (partition by f1, f2 order by f2 +-- groups between 1 following and 2 following) +-- from t1 where f1 = f2; + +-- ordering by a non-integer constant is allowed +SELECT rank() OVER (ORDER BY length('abc')); + +-- can't order by another window function +-- [SPARK-28566] window functions should not be allowed in window definitions +-- SELECT rank() OVER (ORDER BY rank() OVER (ORDER BY random())); + +-- some other errors +SELECT * FROM empsalary WHERE row_number() OVER (ORDER BY salary) < 10; + +SELECT * FROM empsalary INNER JOIN tenk1 ON row_number() OVER (ORDER BY salary) < 10; + +SELECT rank() OVER (ORDER BY 1), count(*) FROM empsalary GROUP BY 1; + +SELECT * FROM rank() OVER (ORDER BY random()); + +-- Original query: DELETE FROM empsalary WHERE (rank() OVER (ORDER BY random())) > 10; +SELECT * FROM empsalary WHERE (rank() OVER (ORDER BY random())) > 10; + +-- Original query: DELETE FROM empsalary RETURNING rank() OVER (ORDER BY random()); +SELECT * FROM empsalary WHERE rank() OVER (ORDER BY random()); + +-- [SPARK-28645] Throw an error on window redefinition +-- select count(*) OVER w FROM tenk1 WINDOW w AS (ORDER BY unique1), w AS (ORDER BY unique1); + +select rank() OVER (PARTITION BY four, ORDER BY ten) FROM tenk1; + +-- [SPARK-28646] Allow usage of `count` only for parameterless aggregate function +-- select count() OVER () FROM tenk1; + +-- The output is the expected one: `range` is not a window or aggregate function. +SELECT range(1, 100) OVER () FROM empsalary; + +SELECT ntile(0) OVER (ORDER BY ten), ten, four FROM tenk1; + +-- [SPARK-27951] ANSI SQL: NTH_VALUE function +-- SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1; + +-- filter + +-- [SPARK-28500] Adds support for `filter` clause +-- SELECT sum(salary), row_number() OVER (ORDER BY depname), sum( +-- sum(salary) FILTER (WHERE enroll_date > '2007-01-01') +-- ) +-- FROM empsalary GROUP BY depname; + +-- Test pushdown of quals into a subquery containing window functions + +-- pushdown is safe because all PARTITION BY clauses include depname: +-- Since EXPLAIN clause rely on host physical location, it is commented out +-- EXPLAIN +-- SELECT * FROM +-- (SELECT depname, +-- sum(salary) OVER (PARTITION BY depname) depsalary, +-- min(salary) OVER (PARTITION BY depname || 'A', depname) depminsalary +-- FROM empsalary) emp +-- WHERE depname = 'sales'; + +-- pushdown is unsafe because there's a PARTITION BY clause without depname: +-- Since EXPLAIN clause rely on host physical location, it is commented out +-- EXPLAIN +-- SELECT * FROM +-- (SELECT depname, +-- sum(salary) OVER (PARTITION BY enroll_date) enroll_salary, +-- min(salary) OVER (PARTITION BY depname) depminsalary +-- FROM empsalary) emp +-- WHERE depname = 'sales'; + +-- Test Sort node collapsing +-- Since EXPLAIN clause rely on host physical location, it is commented out +-- EXPLAIN +-- SELECT * FROM +-- (SELECT depname, +-- sum(salary) OVER (PARTITION BY depname order by empno) depsalary, +-- min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary +-- FROM empsalary) emp +-- WHERE depname = 'sales'; + +-- Test Sort node reordering +-- Since EXPLAIN clause rely on host physical location, it is commented out +-- EXPLAIN +-- SELECT +-- lead(1) OVER (PARTITION BY depname ORDER BY salary, enroll_date), +-- lag(1) OVER (PARTITION BY depname ORDER BY salary,enroll_date,empno) +-- FROM empsalary; + +-- cleanup +DROP TABLE empsalary; +DROP TABLE datetimes; +DROP TABLE t1; diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out new file mode 100644 index 0000000000000..80e4385ebd84b --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out @@ -0,0 +1,409 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 29 + + +-- !query 0 +CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1 +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +CREATE TABLE empsalary ( + depname string, + empno integer, + salary int, + enroll_date date +) USING parquet +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +INSERT INTO empsalary VALUES + ('develop', 10, 5200, date '2007-08-01'), + ('sales', 1, 5000, date '2006-10-01'), + ('personnel', 5, 3500, date '2007-12-10'), + ('sales', 4, 4800, date '2007-08-08'), + ('personnel', 2, 3900, date '2006-12-23'), + ('develop', 7, 4200, date '2008-01-01'), + ('develop', 9, 4500, date '2008-01-01'), + ('sales', 3, 4800, date '2007-08-01'), + ('develop', 8, 6000, date '2006-10-01'), + ('develop', 11, 5200, date '2007-08-15') +-- !query 2 schema +struct<> +-- !query 2 output + + + +-- !query 3 +create table datetimes ( + id int, + f_time timestamp, + f_timetz timestamp, + f_interval timestamp, + f_timestamptz timestamp, + f_timestamp timestamp +) using parquet +-- !query 3 schema +struct<> +-- !query 3 output + + + +-- !query 4 +insert into datetimes values +(1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), cast ('2000-10-19 10:23:54+01' as timestamp), timestamp '2000-10-19 10:23:54'), +(2, timestamp '12:00', cast ('12:00 BST' as timestamp), cast ('2 years' as timestamp), cast ('2001-10-19 10:23:54+01' as timestamp), timestamp '2001-10-19 10:23:54'), +(3, timestamp '13:00', cast ('13:00 BST' as timestamp), cast ('3 years' as timestamp), cast ('2001-10-19 10:23:54+01' as timestamp), timestamp '2001-10-19 10:23:54'), +(4, timestamp '14:00', cast ('14:00 BST' as timestamp), cast ('4 years' as timestamp), cast ('2002-10-19 10:23:54+01' as timestamp), timestamp '2002-10-19 10:23:54'), +(5, timestamp '15:00', cast ('15:00 BST' as timestamp), cast ('5 years' as timestamp), cast ('2003-10-19 10:23:54+01' as timestamp), timestamp '2003-10-19 10:23:54'), +(6, timestamp '15:00', cast ('15:00 BST' as timestamp), cast ('5 years' as timestamp), cast ('2004-10-19 10:23:54+01' as timestamp), timestamp '2004-10-19 10:23:54'), +(7, timestamp '17:00', cast ('17:00 BST' as timestamp), cast ('7 years' as timestamp), cast ('2005-10-19 10:23:54+01' as timestamp), timestamp '2005-10-19 10:23:54'), +(8, timestamp '18:00', cast ('18:00 BST' as timestamp), cast ('8 years' as timestamp), cast ('2006-10-19 10:23:54+01' as timestamp), timestamp '2006-10-19 10:23:54'), +(9, timestamp '19:00', cast ('19:00 BST' as timestamp), cast ('9 years' as timestamp), cast ('2007-10-19 10:23:54+01' as timestamp), timestamp '2007-10-19 10:23:54'), +(10, timestamp '20:00', cast ('20:00 BST' as timestamp), cast ('10 years' as timestamp), cast ('2008-10-19 10:23:54+01' as timestamp), timestamp '2008-10-19 10:23:54') +-- !query 4 schema +struct<> +-- !query 4 output + + + +-- !query 5 +WITH cte (x) AS ( + SELECT * FROM range(1, 36, 2) +) +SELECT x, (sum(x) over w) +FROM cte +WINDOW w AS (ORDER BY x rows between 1 preceding and 1 following) +-- !query 5 schema +struct +-- !query 5 output +1 4 +11 33 +13 39 +15 45 +17 51 +19 57 +21 63 +23 69 +25 75 +27 81 +29 87 +3 9 +31 93 +33 99 +35 68 +5 15 +7 21 +9 27 + + +-- !query 6 +WITH cte (x) AS ( + SELECT * FROM range(1, 36, 2) +) +SELECT x, (sum(x) over w) +FROM cte +WINDOW w AS (ORDER BY x range between 1 preceding and 1 following) +-- !query 6 schema +struct +-- !query 6 output +1 1 +11 11 +13 13 +15 15 +17 17 +19 19 +21 21 +23 23 +25 25 +27 27 +29 29 +3 3 +31 31 +33 33 +35 35 +5 5 +7 7 +9 9 + + +-- !query 7 +WITH cte (x) AS ( + select 1 union all select 1 union all select 1 union all + SELECT * FROM range(5, 50, 2) +) +SELECT x, (sum(x) over w) +FROM cte +WINDOW w AS (ORDER BY x rows between 1 preceding and 1 following) +-- !query 7 schema +struct +-- !query 7 output +1 2 +1 3 +1 7 +11 33 +13 39 +15 45 +17 51 +19 57 +21 63 +23 69 +25 75 +27 81 +29 87 +31 93 +33 99 +35 105 +37 111 +39 117 +41 123 +43 129 +45 135 +47 141 +49 96 +5 13 +7 21 +9 27 + + +-- !query 8 +WITH cte (x) AS ( + select 1 union all select 1 union all select 1 union all + SELECT * FROM range(5, 50, 2) +) +SELECT x, (sum(x) over w) +FROM cte +WINDOW w AS (ORDER BY x range between 1 preceding and 1 following) +-- !query 8 schema +struct +-- !query 8 output +1 3 +1 3 +1 3 +11 11 +13 13 +15 15 +17 17 +19 19 +21 21 +23 23 +25 25 +27 27 +29 29 +31 31 +33 33 +35 35 +37 37 +39 39 +41 41 +43 43 +45 45 +47 47 +49 49 +5 5 +7 7 +9 9 + + +-- !query 9 +SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk2)s LIMIT 0 +-- !query 9 schema +struct +-- !query 9 output + + + +-- !query 10 +create table t1 (f1 int, f2 int) using parquet +-- !query 10 schema +struct<> +-- !query 10 output + + + +-- !query 11 +insert into t1 values (1,1),(1,2),(2,2) +-- !query 11 schema +struct<> +-- !query 11 output + + + +-- !query 12 +select f1, sum(f1) over (partition by f1 + range between 1 preceding and 1 following) +from t1 where f1 = f2 +-- !query 12 schema +struct<> +-- !query 12 output +org.apache.spark.sql.AnalysisException +cannot resolve '(PARTITION BY default.t1.`f1` RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING)' due to data type mismatch: A range window frame cannot be used in an unordered window specification.; line 1 pos 24 + + +-- !query 13 +select f1, sum(f1) over (partition by f1 order by f2 +range between 1 preceding and 1 following) +from t1 where f1 = f2 +-- !query 13 schema +struct +-- !query 13 output +1 1 +2 2 + + +-- !query 14 +select f1, sum(f1) over (partition by f1, f1 order by f2 +range between 2 preceding and 1 preceding) +from t1 where f1 = f2 +-- !query 14 schema +struct +-- !query 14 output +1 NULL +2 NULL + + +-- !query 15 +select f1, sum(f1) over (partition by f1, f2 order by f2 +range between 1 following and 2 following) +from t1 where f1 = f2 +-- !query 15 schema +struct +-- !query 15 output +1 NULL +2 NULL + + +-- !query 16 +SELECT rank() OVER (ORDER BY length('abc')) +-- !query 16 schema +struct +-- !query 16 output +1 + + +-- !query 17 +SELECT * FROM empsalary WHERE row_number() OVER (ORDER BY salary) < 10 +-- !query 17 schema +struct<> +-- !query 17 output +org.apache.spark.sql.AnalysisException +It is not allowed to use window functions inside WHERE and HAVING clauses; + + +-- !query 18 +SELECT * FROM empsalary INNER JOIN tenk1 ON row_number() OVER (ORDER BY salary) < 10 +-- !query 18 schema +struct<> +-- !query 18 output +org.apache.spark.sql.AnalysisException + +The query operator `Join` contains one or more unsupported +expression types Aggregate, Window or Generate. +Invalid expressions: [row_number() OVER (ORDER BY default.empsalary.`salary` ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)]; + + +-- !query 19 +SELECT rank() OVER (ORDER BY 1), count(*) FROM empsalary GROUP BY 1 +-- !query 19 schema +struct<> +-- !query 19 output +org.apache.spark.sql.AnalysisException + +The query operator `Aggregate` contains one or more unsupported +expression types Aggregate, Window or Generate. +Invalid expressions: [RANK() OVER (ORDER BY 1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)]; + + +-- !query 20 +SELECT * FROM rank() OVER (ORDER BY random()) +-- !query 20 schema +struct<> +-- !query 20 output +org.apache.spark.sql.catalyst.parser.ParseException + +no viable alternative at input 'ORDER'(line 1, pos 27) + +== SQL == +SELECT * FROM rank() OVER (ORDER BY random()) +---------------------------^^^ + + +-- !query 21 +SELECT * FROM empsalary WHERE (rank() OVER (ORDER BY random())) > 10 +-- !query 21 schema +struct<> +-- !query 21 output +org.apache.spark.sql.AnalysisException +It is not allowed to use window functions inside WHERE and HAVING clauses; + + +-- !query 22 +SELECT * FROM empsalary WHERE rank() OVER (ORDER BY random()) +-- !query 22 schema +struct<> +-- !query 22 output +org.apache.spark.sql.AnalysisException +It is not allowed to use window functions inside WHERE and HAVING clauses; + + +-- !query 23 +select rank() OVER (PARTITION BY four, ORDER BY ten) FROM tenk1 +-- !query 23 schema +struct<> +-- !query 23 output +org.apache.spark.sql.catalyst.parser.ParseException + +no viable alternative at input 'ORDER'(line 1, pos 39) + +== SQL == +select rank() OVER (PARTITION BY four, ORDER BY ten) FROM tenk1 +---------------------------------------^^^ + + +-- !query 24 +SELECT range(1, 100) OVER () FROM empsalary +-- !query 24 schema +struct<> +-- !query 24 output +org.apache.spark.sql.AnalysisException +Undefined function: 'range'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7 + + +-- !query 25 +SELECT ntile(0) OVER (ORDER BY ten), ten, four FROM tenk1 +-- !query 25 schema +struct<> +-- !query 25 output +org.apache.spark.sql.AnalysisException +cannot resolve 'ntile(0)' due to data type mismatch: Buckets expression must be positive, but got: 0; line 1 pos 7 + + +-- !query 26 +DROP TABLE empsalary +-- !query 26 schema +struct<> +-- !query 26 output + + + +-- !query 27 +DROP TABLE datetimes +-- !query 27 schema +struct<> +-- !query 27 output + + + +-- !query 28 +DROP TABLE t1 +-- !query 28 schema +struct<> +-- !query 28 output +