-
Notifications
You must be signed in to change notification settings - Fork 0
/
COVID_Queries.sql
155 lines (146 loc) · 4.62 KB
/
COVID_Queries.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
-- Get the worst day of the covid total cases
SELECT
t1.location AS location,
MAX(t1.date) AS date,
MAX(t1.new_cases) AS new_cases
FROM covid_data AS t1
INNER JOIN (
SELECT
location,
MAX(new_cases) AS new_cases
FROM covid_data
WHERE new_cases IS NOT null AND continent IS NOT null
GROUP BY location
) AS t2
ON t1.location = t2.location AND t1.new_cases = t2.new_cases
WHERE t1.continent IS NOT null
GROUP BY t1.location
ORDER BY new_cases DESC
-- Get the worst day of the covid total cases with CTE
--- Define the CTE expression name and column list
WITH newCases_country (location, date, new_cases)
AS
--- Define the CTE query
(
SELECT
t1.location AS location,
MAX(t1.date) AS date,
MAX(t1.new_cases) AS new_cases
FROM covid_data AS t1
INNER JOIN (
SELECT
location,
MAX(new_cases) AS new_cases
FROM covid_data
WHERE new_cases IS NOT null AND continent IS NOT null
GROUP BY location
) AS t2
ON t1.location = t2.location AND t1.new_cases = t2.new_cases
WHERE t1.continent IS NOT null
GROUP BY t1.location
)
--- Define the outer query referencing the CTE name
SELECT *
FROM newCases_country
-- Get the worst day of the covid new cases measured by new cases per million
SELECT
t1.location AS location,
MAX(t1.date) AS date,
MAX(ROUND(t1.new_cases / population * 1000000)) AS new_cases
FROM covid_data AS t1
INNER JOIN (
SELECT
location,
MAX(ROUND(new_cases / population * 1000000)) AS new_cases
FROM covid_data
WHERE new_cases IS NOT null AND continent IS NOT null
GROUP BY location
) AS t2
ON t1.location = t2.location AND t1.new_cases = t2.new_cases
WHERE t1.continent IS NOT null
GROUP BY t1.location
ORDER BY new_cases DESC
-- Get the worst day of the covid in deaths
SELECT
t1.location,
t1.date,
t1.new_deaths
FROM covid_data AS t1
INNER JOIN (
SELECT
location,
MAX(new_deaths) AS new_deaths
FROM covid_data
WHERE new_deaths IS NOT null
GROUP BY location
) AS t2
ON t1.location = t2.location AND t1.new_deaths = t2.new_deaths
WHERE t1.continent IS NOT null
ORDER BY new_deaths DESC
-- Ranking of countries by percentage of population vaccinated
SELECT
location,
MAX(people_fully_vaccinated) AS people_fully_vaccinated,
MAX(people_fully_vaccinated) / MAX(population) * 100 AS percentage_fully_vaccinated
FROM covid_data
WHERE continent IS NOT null AND people_fully_vaccinated IS NOT null
AND population IS NOT null
GROUP BY location
ORDER BY percentage_fully_vaccinated DESC
-- Percentage of people who died and the country life expectancy
SELECT
location,
MAX(life_expectancy) AS life_expectancy,
SUM(new_deaths) / MAX(population) * 100 AS percentage_died
FROM covid_data
WHERE continent IS NOT null AND life_expectancy IS NOT null AND population IS NOT null AND new_deaths IS NOT null
GROUP BY location
ORDER BY percentage_died DESC
-- Create a view of the last query to use in future analysis
CREATE VIEW mortalityRate_lifeExpectancy AS
SELECT
location,
MAX(life_expectancy) AS life_expectancy,
SUM(new_deaths) / MAX(population) * 100 AS percentage_died
FROM covid_data
WHERE continent IS NOT null AND life_expectancy IS NOT null AND population IS NOT null AND new_deaths IS NOT null
GROUP BY location
ORDER BY percentage_died DESC
-- Top days where new cases were reported (Worldwide)
SELECT
date,
SUM(new_cases) AS new_cases
FROM covid_data
WHERE continent IS NOT null AND new_cases IS NOT null
GROUP BY date
ORDER BY new_cases DESC
-- Get correlation coefficient between percentage of smoking population and deaths
SELECT CORR(smokers, deaths_per_100000)
FROM
(SELECT
location,
(MAX(female_smokers) + MAX(male_smokers))/2 AS smokers,
MAX(total_deaths) / MAX(population) * 100000 AS deaths_per_100000
FROM covid_data
WHERE male_smokers IS NOT null AND continent IS NOT null AND total_deaths IS NOT null
GROUP BY location) AS t
-- Get correlation coefficient between percentage of smoking population and deaths without a builtin function
SELECT
((tot_sum - (smokers_sum * deaths_per_100000_sum / _count)) / SQRT((smokers_sum_sq - pow(smokers_sum, 2.0) / _count) * (deaths_per_100000_sum_sq - pow(deaths_per_100000_sum, 2.0) / _count))) AS pearson_corr
FROM(
SELECT
SUM(smokers) AS smokers_sum,
SUM(deaths_per_100000) AS deaths_per_100000_sum,
SUM(smokers * smokers) AS smokers_sum_sq,
SUM(deaths_per_100000 * deaths_per_100000) AS deaths_per_100000_sum_sq,
SUM(smokers * deaths_per_100000) AS tot_sum,
COUNT(*) AS _count
FROM
(SELECT
location,
(MAX(female_smokers) + MAX(male_smokers))/2 AS smokers,
MAX(total_deaths) / MAX(population) * 100000 AS deaths_per_100000
FROM covid_data
WHERE male_smokers IS NOT null AND continent IS NOT null AND total_deaths IS NOT null
GROUP BY location) AS t1
) AS t2