-
Notifications
You must be signed in to change notification settings - Fork 0
/
Covid_Analysis
218 lines (171 loc) · 8.89 KB
/
Covid_Analysis
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
SELECT *
FROM Covid_Analysis_Project..CovidVaccinations
ORDER BY 3,4;
SELECT *
FROM Covid_Analysis_Project..CovidDeaths
--Noticed that the continent is null for some rows so I cleaned the data by filtering out the null values, using a where clause.
WHERE continent IS NOT NULL
ORDER BY 3,4;
-- Let's select the data that I will use, and order it by date and location (1st and 2nd column, respectively).
SELECT location, date, total_cases, new_cases, total_deaths, population
FROM Covid_Analysis_Project..CovidDeaths
ORDER BY 1,2;
-- Looking at Total Cases vs Total Deaths (Percentage of people who have been infected and have died as a result of it).
SELECT location, date, total_cases, COALESCE(total_deaths,0) AS total_deaths, COALESCE((total_deaths/total_cases)*100,0) AS percentage_deaths
FROM Covid_Analysis_Project..CovidDeaths
ORDER BY 1,2;
SELECT location, date, total_cases, COALESCE(total_deaths,0) AS total_deaths, COALESCE((total_deaths/total_cases)*100,0) AS percentage_deaths
FROM Covid_Analysis_Project..CovidDeaths
WHERE location IN ('United Kingdom', 'Zimbabwe') AND date='2021-11-10'
ORDER BY 1,2;
/* As of the date of this analysis, 2021/11/10, the chances of dying from coronavirus in the UK are just a bit over 1.5% percent
while the probability jumps up to over 3.5% in the African country of Zimbabwe. This may be an indicator of a lower tier public health system,
with less capacity to fight over the disease*/
/*For other developed countries, like Spain, the probability is slightly higher than that of the UK,
with an infection fatality rate of 1.74%, as of 2021/11/10.*/
SELECT location, date, total_cases, COALESCE(total_deaths,0) AS total_deaths, COALESCE((total_deaths/total_cases)*100,0) AS percentage_deaths
FROM Covid_Analysis_Project..CovidDeaths
WHERE location ='Spain' AND date='2021-11-10'
ORDER BY 1,2;
--Looking now at the Total Cases vs Population for the United Kingdom, as of 2021/11/10.
SELECT location, date, total_cases, population, ROUND((total_cases/population)*100,2) AS percentage_infected_population
FROM Covid_Analysis_Project..CovidDeaths
WHERE location = 'United Kingdom' AND date='2021-11-10'
AND continent IS NOT NULL
ORDER BY 1,2;
-- Per the results, at the time of this analysis, 13.86% of the population has been infected in the UK.
-- Let's now find out the top 3 countries with the highest infection rates per capita.
SELECT TOP 3 location, population, MAX(total_cases) AS highest_infection_count, ROUND(MAX((total_cases/population))*100,2) AS max_percentage_infected_population
FROM Covid_Analysis_Project..CovidDeaths
GROUP BY location, population
ORDER BY max_percentage_infected_population DESC;
/*Montenegro has the highest percentage of infections, with 23.93% of its population having had coronavirus,
followed by Seychelles and Andorra, with 22.77% and 20.35%, respectively.*/
-- Let's identify the top 3 countries with the highest death count per capita.
SELECT TOP 3 location, population,
MAX(CAST(total_deaths as int)) AS highest_death_count,
--Had to cast the total_deaths from a nvarchar to an integer so the aggregation function would work correctly.
ROUND(MAX((CAST(total_deaths as int)/population))*100,2) AS death_count_per_capita
FROM Covid_Analysis_Project..CovidDeaths
WHERE continent IS NOT NULL
GROUP BY location, population
ORDER BY death_count_per_capita DESC;
/*Peruvian people suffered the greatest impact, with a death count per capita of 0.6%.
Eastern European countries, Bulgaria, and Bosnia and Herzegovina, had 0.37% and 0.36% of its people die from covid.*/
-- Let's break it down by continent and query for top 3 continents with the highest death count.
SELECT top 3 continent,
MAX(CAST(total_deaths as int)) AS highest_death_count,
--Had to cast the total_deaths from a nvarchar to an integer so the aggregation function would work correctly.
ROUND(MAX((CAST(total_deaths as int)/population))*100,2) AS death_count_per_capita
FROM Covid_Analysis_Project..CovidDeaths
WHERE continent IS NOT NULL
GROUP BY continent
ORDER BY highest_death_count DESC;
/* North America has the highest death count, followed by South America and Asia. Quite alarming the that the US with only 7.6%
of the world population share is top in the list.*/
-- Let's now find out when the death count was greatest both globally and in the UK.
SELECT TOP 1 date,
SUM(new_cases) AS total_cases,
SUM(CAST(new_deaths as int)) AS total_deaths,
SUM(CAST(new_deaths as int))/SUM(new_cases)*100 AS percentage_death
FROM Covid_Analysis_Project..CovidDeaths
WHERE continent IS NOT NULL AND total_cases IS NOT NULL
GROUP BY date
ORDER BY 3 DESC;
SELECT TOP 1 date,
SUM(new_cases) AS total_cases,
SUM(CAST(new_deaths as int)) AS total_deaths,
SUM(CAST(new_deaths as int))/SUM(new_cases)*100 AS percentage_death
FROM Covid_Analysis_Project..CovidDeaths
WHERE continent IS NOT NULL AND total_cases IS NOT NULL AND location = 'United Kingdom'
GROUP BY date
ORDER BY 3 DESC;
-- The number of deaths attributed to the coronavirus was the highest in the world on January, 20 2021, with 18,007 deaths in a single day. Similarly, the UK had its greatest fatality count on the same day, with 1,826 deaths.
/*Now, let's join the CovidDeaths table to the CovidVaccinations table to look at
the percentage of people that have been vaccinated by country*/
--OPTION 1. Using a Common Table Expression (CTE)
WITH vaccinatined_population (
continent,
location,
date,
population,
new_vaccinations,
rolling_vaccination_count)
AS
(
SELECT cd.continent,
cd.location,
cd.date,
cd.population,
cv.new_vaccinations,
/*Let's look at the rolling vaccination count with a window function. It's partitioned by location as we
want the count to start over once a location changes. I ordered it by location and date to get the rolling count.*/
SUM(CONVERT(bigint, cv.new_vaccinations)) OVER (PARTITION BY cd.location ORDER BY cd.location, cd.date) AS rolling_vaccination_count
--(rolling_vaccination_count/population)*100 as percentage_of_people_vaccinated
FROM Covid_Analysis_Project..CovidDeaths AS cd
JOIN Covid_Analysis_Project..CovidVaccinations AS cv
ON cd.iso_code=cv.iso_code
AND cd.date=cv.date
WHERE cd.continent IS NOT NULL
--order by 2,3
)
--Let's identify the 3 countries with the lowest vaccination rate
SELECT TOP 3 *, (rolling_vaccination_count/population)*100 AS vaccination_rate
FROM vaccinatined_population
WHERE date = '2021/11/10' AND rolling_vaccination_count IS NOT NULL
ORDER BY vaccination_rate ASC;
--Low-income African countries, Algeria, Burundi and Cameroon, have the lowest vaccination rates in the world as of 2021/11/10.
--OPTION 2. Using a Temp Table
DROP TABLE if exists percent_population_vaccinated
CREATE TABLE percent_population_vaccinated
(
continent nvarchar(255),
location nvarchar(255),
date date,
population numeric,
new_vaccinations bigint,
rolling_vaccination_count numeric
)
INSERT INTO percent_population_vaccinated
SELECT cd.continent,
cd.location,
cd.date,
cd.population,
cv.new_vaccinations,
/*Let's look at the rolling vaccination count with a window function. It's partitioned by location as we
want the count to start over once a location changes and we ordered it by location and date to get the rolling count.*/
SUM(CONVERT(bigint, cv.new_vaccinations)) OVER(PARTITION BY cd.location ORDER BY cd.location, cd.date) AS rolling_vaccination_count
--(rolling_vaccination_count/population)*100 as percentage_of_people_vaccinated
FROM Covid_Analysis_Project..CovidDeaths AS cd
JOIN Covid_Analysis_Project..CovidVaccinations AS cv
ON cd.iso_code=cv.iso_code
AND cd.date=cv.date
WHERE cd.continent IS NOT NULL
--order by 2,3
SELECT TOP 3 *, (rolling_vaccination_count/population)*100 as vaccination_rate
FROM percent_population_vaccinated
WHERE date = '2021/11/10' AND rolling_vaccination_count IS NOT NULL
ORDER BY vaccination_rate ASC;
--The exact same result was obtained using this method.
--OPTION 3. Creating a View. It provides added security and the ability to store the data for later analyses and viz.
DROP VIEW IF EXISTS dbo.percentpopulationvaccinated
CREATE VIEW percentpopulationvaccinated AS
SELECT cd.continent,
cd.location,
cd.date,
cd.population,
cv.new_vaccinations,
/*Let's look at the rolling vaccination count with a window function. It's partitioned by location as we
want the count to start over once a location changes and we ordered it by location and date to get the rolling count.*/
SUM(CONVERT(bigint, cv.new_vaccinations)) OVER(PARTITION BY cd.location ORDER BY cd.location, cd.date) AS rolling_vaccination_count
FROM Covid_Analysis_Project..CovidDeaths AS cd
JOIN Covid_Analysis_Project..CovidVaccinations AS cv
ON cd.iso_code=cv.iso_code
AND cd.date=cv.date
WHERE cd.continent IS NOT NULL
--order by 2,3
-- Querying off of the newly created view.
SELECT TOP 3 *, (rolling_vaccination_count/population)*100 AS vaccination_rate
FROM percentpopulationvaccinated
WHERE date = '2021/11/10' AND rolling_vaccination_count IS NOT NULL
ORDER BY vaccination_rate ASC;