sql/core/src/test/resources/sql-tests/results/group-by.sql.out

-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 64


-- !query
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
AS testData(a, b)
-- !query schema
struct<>
-- !query output


-- !query
SELECT a, COUNT(b) FROM testData
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
grouping expressions sequence is empty, and 'testdata.a' is not an aggregate function. Wrap '(count(testdata.b) AS `count(b)`)' in windowing function(s) or wrap 'testdata.a' in first() (or first_value) if you don't care which value you get.


-- !query
SELECT COUNT(a), COUNT(b) FROM testData
-- !query schema
struct<count(a):bigint,count(b):bigint>
-- !query output
7	7


-- !query
SELECT a, COUNT(b) FROM testData GROUP BY a
-- !query schema
struct<a:int,count(b):bigint>
-- !query output
1	2
2	2
3	2
NULL	1


-- !query
SELECT a, COUNT(b) FROM testData GROUP BY b
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.


-- !query
SELECT COUNT(a), COUNT(b) FROM testData GROUP BY a
-- !query schema
struct<count(a):bigint,count(b):bigint>
-- !query output
0	1
2	2
2	2
3	2


-- !query
SELECT 'foo', COUNT(a) FROM testData GROUP BY 1
-- !query schema
struct<foo:string,count(a):bigint>
-- !query output
foo	7


-- !query
SELECT 'foo' FROM testData WHERE a = 0 GROUP BY 1
-- !query schema
struct<foo:string>
-- !query output


-- !query
SELECT 'foo', APPROX_COUNT_DISTINCT(a) FROM testData WHERE a = 0 GROUP BY 1
-- !query schema
struct<foo:string,approx_count_distinct(a):bigint>
-- !query output


-- !query
SELECT 'foo', MAX(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1
-- !query schema
struct<foo:string,max(struct(a)):struct<a:int>>
-- !query output


-- !query
SELECT a + b, COUNT(b) FROM testData GROUP BY a + b
-- !query schema
struct<(a + b):int,count(b):bigint>
-- !query output
2	1
3	2
4	2
5	1
NULL	1


-- !query
SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.


-- !query
SELECT a + 1 + 1, COUNT(b) FROM testData GROUP BY a + 1
-- !query schema
struct<((a + 1) + 1):int,count(b):bigint>
-- !query output
3	2
4	2
5	2
NULL	1


-- !query
SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a)
FROM testData
-- !query schema
struct<skewness(a):double,kurtosis(a):double,min(a):int,max(a):int,avg(a):double,variance(a):double,stddev(a):double,sum(a):bigint,count(a):bigint>
-- !query output
-0.2723801058145729	-1.5069204152249134	1	3	2.142857142857143	0.8095238095238094	0.8997354108424372	15	7


-- !query
SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a
-- !query schema
struct<count(DISTINCT b):bigint,count(DISTINCT b, c):bigint>
-- !query output
1	1


-- !query
SELECT a AS k, COUNT(b) FROM testData GROUP BY k
-- !query schema
struct<k:int,count(b):bigint>
-- !query output
1	2
2	2
3	2
NULL	1


-- !query
SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1
-- !query schema
struct<k:int,count(b):bigint>
-- !query output
2	2
3	2


-- !query
SELECT COUNT(b) AS k FROM testData GROUP BY k
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
aggregate functions are not allowed in GROUP BY, but found count(testdata.b)


-- !query
CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v)
-- !query schema
struct<>
-- !query output


-- !query
SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias GROUP BY a
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
expression 'testdatahassamenamewithalias.k' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.


-- !query
set spark.sql.groupByAliases=false
-- !query schema
struct<key:string,value:string>
-- !query output
spark.sql.groupByAliases	false


-- !query
SELECT a AS k, COUNT(b) FROM testData GROUP BY k
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'k' given input columns: [testdata.a, testdata.b]; line 1 pos 47


-- !query
SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a
-- !query schema
struct<a:int,count(1):bigint>
-- !query output


-- !query
SELECT COUNT(1) FROM testData WHERE false
-- !query schema
struct<count(1):bigint>
-- !query output
0


-- !query
SELECT 1 FROM (SELECT COUNT(1) FROM testData WHERE false) t
-- !query schema
struct<1:int>
-- !query output
1


-- !query
SELECT 1 from (
  SELECT 1 AS z,
  MIN(a.x)
  FROM (select 1 as x) a
  WHERE false
) b
where b.z != b.z
-- !query schema
struct<1:int>
-- !query output


-- !query
SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*)
  FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y)
-- !query schema
struct<corr(DISTINCT x, y):double,corr(DISTINCT y, x):double,count(1):bigint>
-- !query output
1.0	1.0	3


-- !query
SELECT 1 FROM range(10) HAVING true
-- !query schema
struct<1:int>
-- !query output
1


-- !query
SELECT 1 FROM range(10) HAVING MAX(id) > 0
-- !query schema
struct<1:int>
-- !query output
1


-- !query
SELECT id FROM range(10) HAVING id > 0
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
grouping expressions sequence is empty, and 'id' is not an aggregate function. Wrap '()' in windowing function(s) or wrap 'id' in first() (or first_value) if you don't care which value you get.


-- !query
SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true
-- !query schema
struct<key:string,value:string>
-- !query output
spark.sql.legacy.parser.havingWithoutGroupByAsWhere	true


-- !query
SELECT 1 FROM range(10) HAVING true
-- !query schema
struct<1:int>
-- !query output
1
1
1
1
1
1
1
1
1
1


-- !query
SELECT 1 FROM range(10) HAVING MAX(id) > 0
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException

Aggregate/Window/Generate expressions are not valid in where clause of the query.
Expression in where clause: [(max(id) > CAST(0 AS BIGINT))]
Invalid expressions: [max(id)]


-- !query
SELECT id FROM range(10) HAVING id > 0
-- !query schema
struct<id:bigint>
-- !query output
1
2
3
4
5
6
7
8
9


-- !query
SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false
-- !query schema
struct<key:string,value:string>
-- !query output
spark.sql.legacy.parser.havingWithoutGroupByAsWhere	false


-- !query
CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
  (1, true), (1, false),
  (2, true),
  (3, false), (3, null),
  (4, null), (4, null),
  (5, null), (5, true), (5, false) AS test_agg(k, v)
-- !query schema
struct<>
-- !query output


-- !query
SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0
-- !query schema
struct<every(v):boolean,some(v):boolean,any(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
-- !query output
NULL	NULL	NULL	NULL	NULL


-- !query
SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4
-- !query schema
struct<every(v):boolean,some(v):boolean,any(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
-- !query output
NULL	NULL	NULL	NULL	NULL


-- !query
SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5
-- !query schema
struct<every(v):boolean,some(v):boolean,any(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
-- !query output
false	true	true	false	true


-- !query
SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k
-- !query schema
struct<k:int,every(v):boolean,some(v):boolean,any(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
-- !query output
1	false	true	true	false	true
2	true	true	true	true	true
3	false	false	false	false	false
4	NULL	NULL	NULL	NULL	NULL
5	false	true	true	false	true


-- !query
SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) = false
-- !query schema
struct<k:int,every(v):boolean>
-- !query output
1	false
3	false
5	false


-- !query
SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) IS NULL
-- !query schema
struct<k:int,every(v):boolean>
-- !query output
4	NULL


-- !query
SELECT k,
       Every(v) AS every
FROM   test_agg
WHERE  k = 2
       AND v IN (SELECT Any(v)
                 FROM   test_agg
                 WHERE  k = 1)
GROUP  BY k
-- !query schema
struct<k:int,every:boolean>
-- !query output
2	true


-- !query
SELECT k,
       Every(v) AS every
FROM   test_agg
WHERE  k = 2
       AND v IN (SELECT Every(v)
                 FROM   test_agg
                 WHERE  k = 1)
GROUP  BY k
-- !query schema
struct<k:int,every:boolean>
-- !query output


-- !query
SELECT every(1)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'every(1)' due to data type mismatch: Input to function 'every' should have been boolean, but it's [int].; line 1 pos 7


-- !query
SELECT some(1S)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'some(1S)' due to data type mismatch: Input to function 'some' should have been boolean, but it's [smallint].; line 1 pos 7


-- !query
SELECT any(1L)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'any(1L)' due to data type mismatch: Input to function 'any' should have been boolean, but it's [bigint].; line 1 pos 7


-- !query
SELECT every("true")
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'every('true')' due to data type mismatch: Input to function 'every' should have been boolean, but it's [string].; line 1 pos 7


-- !query
SELECT bool_and(1.0)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'bool_and(1.0BD)' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [decimal(2,1)].; line 1 pos 7


-- !query
SELECT bool_or(1.0D)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'bool_or(1.0D)' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [double].; line 1 pos 7


-- !query
SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
-- !query schema
struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
-- !query output
1	false	false
1	true	false
2	true	true
3	NULL	NULL
3	false	false
4	NULL	NULL
4	NULL	NULL
5	NULL	NULL
5	false	false
5	true	false


-- !query
SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
-- !query schema
struct<k:int,v:boolean,some(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
-- !query output
1	false	false
1	true	true
2	true	true
3	NULL	NULL
3	false	false
4	NULL	NULL
4	NULL	NULL
5	NULL	NULL
5	false	false
5	true	true


-- !query
SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
-- !query schema
struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
-- !query output
1	false	false
1	true	true
2	true	true
3	NULL	NULL
3	false	false
4	NULL	NULL
4	NULL	NULL
5	NULL	NULL
5	false	false
5	true	true


-- !query
SELECT k, v, bool_and(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
-- !query schema
struct<k:int,v:boolean,bool_and(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
-- !query output
1	false	false
1	true	false
2	true	true
3	NULL	NULL
3	false	false
4	NULL	NULL
4	NULL	NULL
5	NULL	NULL
5	false	false
5	true	false


-- !query
SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
-- !query schema
struct<k:int,v:boolean,bool_or(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
-- !query output
1	false	false
1	true	true
2	true	true
3	NULL	NULL
3	false	false
4	NULL	NULL
4	NULL	NULL
5	NULL	NULL
5	false	false
5	true	true


-- !query
SELECT count(*) FROM test_agg HAVING count(*) > 1L
-- !query schema
struct<count(1):bigint>
-- !query output
10


-- !query
SELECT k, max(v) FROM test_agg GROUP BY k HAVING max(v) = true
-- !query schema
struct<k:int,max(v):boolean>
-- !query output
1	true
2	true
5	true


-- !query
SELECT * FROM (SELECT COUNT(*) AS cnt FROM test_agg) WHERE cnt > 1L
-- !query schema
struct<cnt:bigint>
-- !query output
10


-- !query
SELECT count(*) FROM test_agg WHERE count(*) > 1L
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException

Aggregate/Window/Generate expressions are not valid in where clause of the query.
Expression in where clause: [(count(1) > 1L)]
Invalid expressions: [count(1)]


-- !query
SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException

Aggregate/Window/Generate expressions are not valid in where clause of the query.
Expression in where clause: [((count(1) + 1L) > 1L)]
Invalid expressions: [count(1)]


-- !query
SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException

Aggregate/Window/Generate expressions are not valid in where clause of the query.
Expression in where clause: [(((test_agg.k = 1) OR (test_agg.k = 2)) OR (((count(1) + 1L) > 1L) OR (max(test_agg.k) > 1)))]
Invalid expressions: [count(1), max(test_agg.k)]


-- !query
SELECT AVG(DISTINCT decimal_col), SUM(DISTINCT decimal_col) FROM VALUES (CAST(1 AS DECIMAL(9, 0))) t(decimal_col)
-- !query schema
struct<avg(DISTINCT decimal_col):decimal(13,4),sum(DISTINCT decimal_col):decimal(19,0)>
-- !query output
1.0000	1


-- !query
SELECT not(a IS NULL), count(*) AS c
FROM testData
GROUP BY a IS NULL
-- !query schema
struct<(NOT (a IS NULL)):boolean,c:bigint>
-- !query output
false	2
true	7


-- !query
SELECT if(not(a IS NULL), rand(0), 1), count(*) AS c
FROM testData
GROUP BY a IS NULL
-- !query schema
struct<(IF((NOT (a IS NULL)), rand(0), 1)):double,c:bigint>
-- !query output
0.7604953758285915	7
1.0	2