-
Notifications
You must be signed in to change notification settings - Fork 0
/
window-functions.sql
167 lines (155 loc) · 4.91 KB
/
window-functions.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
-- WINDOW/ANALYTIC FUCNTIONS
-- Calculate an aggregate value based on a group of rows
-- Unlike aggregate functions, analytic functions can return multiple rows for each group.
-- We can use window functions to compute aggregated values, moving averages, running totals, percentages or top-N results within a group and much, much more.
-- use the OVER() clause
-- To get the total region population for each country:
-- We will first get the total region population
select
a.name,
a.country_id,
total_pop_region,
a.region_id,
a.population,
round(a.population / b.total_pop_region * 100, 3) as pop_region_percentage
from eba_countries a
left join
(select region_id, sum(population) as total_pop_region
from eba_countries
group by region_id) b
on a.region_id = b.region_id
order by name;
-- The way with window functions is:
select
a.name,
a.country_id,
a.region_id,
a.population,
sum(population) over(partition by a.region_id order by a.region_id) as sum_pop_region,
round(population / (sum(population) over(partition by a.region_id order by a.region_id)) * 100, 3) as pop_region_percentage
from eba_countries a
order by country_id
-- FRAMES EXAMPLES
-- ROWS
-- ROWS 1 PRECEDING = previous row
select
sum(population) over (
partition by region_id
order by sub_region_id
ROWS UNBOUNDED PRECEDING AND 1 FOLLOWING-- This means that it will get the sum of all preceding rows + itself + 1 after itself(= current row)
)
-- RANGE
-- RANGE 1 PRECEDING = if we have subregion_id = 20, then 1 preceding is 19.
select
sum(population) over (
partition by region_id
order by sub_region_id
RANGE BETWEEN UNBOUNDED PRECEDING AND 10 FOLLOWING -- IT CONSIDERS a range of numerical values and compares them to the column value for the column that has been used in the ORDER BY clause. If you do not have anything in your ORDER BY clause, you cannot use range.
)
-- Difference between using order by and not using it:
select
name,
region_id,
sub_region_id,
population,
sum(population) over(partition by region_id order by sub_region_id) as a, -- by default, the range is between unbounded preceding and current row, where in the next window function, the rande covers all rows
sum(population) over(partition by region_id) as b
from eba_countries;
-- AGGREGATES:
-- SUM
-- AVG
-- COUNT
-- MIN
-- MAX
select
name,
region_id,
sum(population) over(partition by region_id),
max(population) over(partition by region_id),
min(population) over(partition by region_id),
count(population) over(partition by region_id),
avg(population) over()
from eba_countries;
-- LAG
select
name,
population,
lag(name) over(order by name),
lag(name, 2) over(order by name),
lag(name, 2, 'blank') over(order by name)
from eba_countries;
-- LEAD
select
name,
population,
lead(name) over(order by name),
lead(name, 2) over(order by name),
lead(name, 2, 'blank') over(order by name)
from eba_countries;
-- NTILE
select
country_id,
name,
population,
ntile(20) over(order by name),
ntile(3) over(order by population desc),
ntile(5) over(order by region_id) -- 5 groups evenly distributed for each region
from eba_countries;
-- NTH_VALUE
select
country_id,
name,
population,
nth_value(name, 2) over(),
nth_value(name, 2) over(partition by region_id)
from eba_countries;
-- ROW_NUMBER
-- RANK
-- DENSE_RANK
select
country_id,
name,
population,
row_number() over(order by population),
rank() over (order by population),
dense_rank() over(order by population)
from eba_countries;
-- NOTE:
select
country_id,
name,
population,
row_number() over(order by population),
rank() over (order by population),
dense_rank() over(order by population)
from eba_countries
order by country_id; -- DOES NOT CHANGE RESULT!! order by is executed last maybe? -> you need to check order in which clauses are executed
-- partitioning:
select
country_id,
name,
population,
region_id,
row_number() over(partition by region_id order by population),
rank() over (partition by region_id order by population),
dense_rank() over(order by population)
from eba_countries;
-- PERCENT_RANK
-- CUME_DIST
select
country_id,
name,
population,
percent_rank() over(partition by region_id order by population),
cume_dist() over(order by population)
from eba_countries;
-- ORDER OF EXECUTION OF SQL QUERIES:
-- 1. FROM
-- 2. JOIN
-- 3. WHERE
-- 4. GROUP BY
-- 5. HAVING
-- 6. WINDOW
-- 7. SELECT
-- 8. ORDER BY
-- 9. LIMIT / FETCH / TOP