/
scalar-subquery-predicate.sql
281 lines (255 loc) · 9.18 KB
/
scalar-subquery-predicate.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
-- A test suite for scalar subquery in predicate context
CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv);
CREATE OR REPLACE TEMPORARY VIEW c AS VALUES (1, 1) AS T(ck, cv);
-- SPARK-18814.1: Simplified version of TPCDS-Q32
SELECT pk, cv
FROM p, c
WHERE p.pk = c.ck
AND c.cv = (SELECT avg(c1.cv)
FROM c c1
WHERE c1.ck = p.pk);
-- SPARK-18814.2: Adding stack of aggregates
SELECT pk, cv
FROM p, c
WHERE p.pk = c.ck
AND c.cv = (SELECT max(avg)
FROM (SELECT c1.cv, avg(c1.cv) avg
FROM c c1
WHERE c1.ck = p.pk
GROUP BY c1.cv));
create temporary view t1 as select * from values
('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'),
('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
('val1a', 16S, 12, 21L, float(15.0), 20D, 20E2BD, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
('val1a', 16S, 12, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
('val1c', 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
('val1d', null, 16, 22L, float(17.0), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', null),
('val1d', null, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.001', null),
('val1e', 10S, null, 25L, float(17.0), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
('val1e', 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
('val1d', 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
('val1e', 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
create temporary view t2 as select * from values
('val2a', 6S, 12, 14L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
('val1b', 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
('val1b', 8S, 16, 119L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
('val1c', 12S, 16, 219L, float(17), 25D, 26E2BD, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
('val1b', null, 16, 319L, float(17), 25D, 26E2BD, timestamp '2017-05-04 01:01:00.000', null),
('val2e', 8S, null, 419L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
('val1f', 19S, null, 519L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
('val1b', 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
('val1b', 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
('val1c', 12S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
('val1e', 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
('val1f', 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
('val1b', null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
create temporary view t3 as select * from values
('val3a', 6S, 12, 110L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
('val3a', 6S, 12, 10L, float(15), 20D, 20E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
('val1b', 10S, 12, 219L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
('val1b', 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
('val1b', 8S, 16, 319L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
('val1b', 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
('val3c', 17S, 16, 519L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
('val3c', 17S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
('val1b', null, 16, 419L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:02:00.000', null),
('val1b', null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
('val3b', 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
('val3b', 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
-- Group 1: scalar subquery in predicate context
-- no correlation
-- TC 01.01
SELECT t1a, t1b
FROM t1
WHERE t1c = (SELECT max(t2c)
FROM t2);
-- TC 01.02
SELECT t1a, t1d, t1f
FROM t1
WHERE t1c = (SELECT max(t2c)
FROM t2)
AND t1b > (SELECT min(t3b)
FROM t3);
-- TC 01.03
SELECT t1a, t1h
FROM t1
WHERE t1c = (SELECT max(t2c)
FROM t2)
OR t1b = (SELECT min(t3b)
FROM t3
WHERE t3b > 10);
-- TC 01.04
-- scalar subquery over outer join
SELECT t1a, t1b, t2d
FROM t1 LEFT JOIN t2
ON t1a = t2a
WHERE t1b = (SELECT min(t3b)
FROM t3);
-- TC 01.05
-- test casting
SELECT t1a, t1b, t1g
FROM t1
WHERE t1c + 5 = (SELECT max(t2e)
FROM t2);
-- TC 01.06
-- test casting
SELECT t1a, t1h
FROM t1
WHERE date(t1h) = (SELECT min(t2i)
FROM t2);
-- TC 01.07
-- same table, expressions in scalar subquery
SELECT t2d, t1a
FROM t1, t2
WHERE t1b = t2b
AND t2c + 1 = (SELECT max(t2c) + 1
FROM t2, t1
WHERE t2b = t1b);
-- TC 01.08
-- same table
SELECT DISTINCT t2a, max_t1g
FROM t2, (SELECT max(t1g) max_t1g, t1a
FROM t1
GROUP BY t1a) t1
WHERE t2a = t1a
AND max_t1g = (SELECT max(t1g)
FROM t1);
-- TC 01.09
-- more than one scalar subquery
SELECT t3b, t3c
FROM t3
WHERE (SELECT max(t3c)
FROM t3
WHERE t3b > 10) >=
(SELECT min(t3b)
FROM t3
WHERE t3c > 0)
AND (t3b is null or t3c is null);
-- Group 2: scalar subquery in predicate context
-- with correlation
-- TC 02.01
SELECT t1a
FROM t1
WHERE t1a < (SELECT max(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c);
-- TC 02.02
SELECT t1a, t1c
FROM t1
WHERE (SELECT max(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c) IS NULL;
-- TC 02.03
SELECT t1a
FROM t1
WHERE t1a = (SELECT max(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c
HAVING count(*) >= 0)
OR t1i > '2014-12-31';
-- TC 02.03.01
SELECT t1a
FROM t1
WHERE t1a = (SELECT max(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c
HAVING count(*) >= 1)
OR t1i > '2014-12-31';
-- TC 02.04
-- t1 on the right of an outer join
-- can be reduced to inner join
SELECT count(t1a)
FROM t1 RIGHT JOIN t2
ON t1d = t2d
WHERE t1a < (SELECT max(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c);
-- TC 02.05
SELECT t1a
FROM t1
WHERE t1b <= (SELECT max(t2b)
FROM t2
WHERE t2c = t1c
GROUP BY t2c)
AND t1b >= (SELECT min(t2b)
FROM t2
WHERE t2c = t1c
GROUP BY t2c);
-- TC 02.06
-- set op
SELECT t1a
FROM t1
WHERE t1a <= (SELECT max(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c)
INTERSECT
SELECT t1a
FROM t1
WHERE t1a >= (SELECT min(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c);
-- TC 02.07.01
-- set op
SELECT t1a
FROM t1
WHERE t1a <= (SELECT max(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c)
UNION ALL
SELECT t1a
FROM t1
WHERE t1a >= (SELECT min(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c);
-- TC 02.07.02
-- set op
SELECT t1a
FROM t1
WHERE t1a <= (SELECT max(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c)
UNION DISTINCT
SELECT t1a
FROM t1
WHERE t1a >= (SELECT min(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c);
-- TC 02.08
-- set op
SELECT t1a
FROM t1
WHERE t1a <= (SELECT max(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c)
MINUS
SELECT t1a
FROM t1
WHERE t1a >= (SELECT min(t2a)
FROM t2
WHERE t2c = t1c
GROUP BY t2c);
-- TC 02.09
-- in HAVING clause
SELECT t1a
FROM t1
GROUP BY t1a, t1c
HAVING max(t1b) <= (SELECT max(t2b)
FROM t2
WHERE t2c = t1c
GROUP BY t2c);