-
Notifications
You must be signed in to change notification settings - Fork 0
/
Covid_Project_Data_Exploration.sql
132 lines (117 loc) · 4.9 KB
/
Covid_Project_Data_Exploration.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/*
COVID 19 Data Exploration
Skills used: Joins, CTE, Temp Table, Windows Functions, Aggregate Functions, Creating Views, Converting Data Types
*/
-- ANALYSIS PER LOCATION
-- 1. Have an overall view of the dataset
SELECT *
FROM PortfolioProject..CovidDeaths
ORDER BY location, date;
SELECT *
FROM PortfolioProject..CovidVaccinations
ORDER BY location, date;
-- 2. Select key variables in the dataset
SELECT location, date, total_cases, new_cases, total_deaths, population
FROM PortfolioProject..CovidDeaths
ORDER BY location, date;
-- 3. Calculate the COVID death rate
SELECT location, date, total_cases, total_deaths,
(CONVERT(FLOAT, total_deaths)/NULLIF(CONVERT(FLOAT, total_cases), 0))*100 AS death_rate
FROM PortfolioProject..CovidDeaths
WHERE continent IS NOT NULL
ORDER BY location, date;
-- 4. Calculate the COVID infection rate by population
SELECT location, date, population, total_cases,
(CONVERT(FLOAT, total_cases)/NULLIF(CONVERT(FLOAT, population), 0))*100 AS infection_rate
FROM PortfolioProject..CovidDeaths
WHERE continent IS NOT NULL
ORDER BY location, date;
-- 5. Calculate the highest infection count and the highest infection rate by population
SELECT location, population,
MAX(CONVERT (FLOAT, total_cases)) AS highest_infection_count,
MAX((CONVERT(FLOAT, total_cases)/NULLIF(CONVERT(FLOAT, population), 0)))*100 AS highest_infection_rate
FROM PortfolioProject..CovidDeaths
WHERE continent IS NOT NULL
GROUP BY location, population
ORDER BY highest_infection_rate DESC;
-- 6. Calculate the highest death count and the highest death rate by population
SELECT location,
MAX(CONVERT (FLOAT, total_deaths)) AS highest_death_count,
MAX((CONVERT(FLOAT, total_deaths)/NULLIF(CONVERT(FLOAT, population), 0)))*100 AS highest_death_rate
FROM PortfolioProject..CovidDeaths
WHERE continent IS NOT NULL
GROUP BY location
ORDER BY highest_death_rate DESC;
-- ANALYSIS PER CONTINENT
-- 7. Calculate the highest infection count and rate and the highest death count and rate in population
SELECT continent,
MAX(CONVERT(FLOAT, total_cases)) AS highest_infection_count,
MAX(CONVERT(FLOAT, total_deaths)) AS highest_death_count,
MAX((CONVERT(FLOAT, total_cases)/NULLIF(CONVERT(FLOAT, population), 0)))*100 AS highest_infection_rate,
MAX((CONVERT(FLOAT, total_deaths)/NULLIF(CONVERT(FLOAT, population), 0)))*100 AS highest_death_rate
FROM PortfolioProject..CovidDeaths
WHERE continent IS NOT NULL
GROUP BY continent
ORDER BY highest_death_rate DESC;
-- ANALYSIS PER DAY
-- 8. Calculate the total new cases, the total new deaths and the death percentage
SELECT date,
SUM(CONVERT(FLOAT, new_cases)) AS total_new_cases,
SUM(CONVERT(FLOAT, new_deaths)) AS total_new_deaths,
(SUM(CONVERT(FLOAT, new_deaths))/SUM(NULLIF(CONVERT(FLOAT, new_cases),0)))*100 AS death_perc
FROM PortfolioProject..CovidDeaths
WHERE continent IS NOT NULL
GROUP BY date
ORDER BY date;
-- ANALYSIS OVER VACCINATIONS
-- 9. Calculate the rolling count of new vaccinations
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations,
SUM(CONVERT(FLOAT,vac.new_vaccinations)) OVER (PARTITION BY dea.location ORDER BY dea.location,dea.date) AS rolling_vaccinations
FROM PortfolioProject..CovidDeaths AS dea
JOIN PortfolioProject..CovidVaccinations AS vac
ON dea.location = vac.location
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL
ORDER BY dea.location, dea.date;
-- 10. Calculate the rolling vaccinations and the vaccination rate in population
-- Use CTE
WITH PopvsVac
AS
(
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations,
SUM(CONVERT(FLOAT,vac.new_vaccinations)) OVER (PARTITION BY dea.location ORDER BY dea.location,dea.date) AS rolling_vaccinations
FROM PortfolioProject..CovidDeaths AS dea
JOIN PortfolioProject..CovidVaccinations AS vac
ON dea.location = vac.location
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL
)
SELECT *, (CONVERT(FLOAT, rolling_vaccinations)/NULLIF((CONVERT(FLOAT, population)), 0))*100 AS vac_rate
FROM PopvsVac;
-- 11. Create TEMP TABLE
DROP TABLE IF EXISTS #Population_Vaccinated_Rate;
CREATE TABLE #Population_Vaccinated_Rate (
continent NVARCHAR(255),
location NVARCHAR(255),
date NVARCHAR(255),
population FLOAT,
new_vaccinations FLOAT,
rolling_vaccinations FLOAT
);
-- Insert into TEMP TABLE
INSERT INTO #Population_Vaccinated_Rate
SELECT dea.continent,
dea.location,
dea.date,
dea.population,
vac.new_vaccinations,
SUM(CONVERT(FLOAT, vac.new_vaccinations)) OVER (PARTITION BY dea.location ORDER BY dea.location, dea.date) AS rolling_vaccinations
FROM PortfolioProject..CovidDeaths AS dea
JOIN PortfolioProject..CovidVaccinations AS vac
ON dea.location = vac.location
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL;
-- Calculate vaccination rate in population
SELECT *,
(CONVERT(FLOAT, rolling_vaccinations) / NULLIF((CONVERT(FLOAT, population)), 0)) * 100 AS vac_rate
FROM #Population_Vaccinated_Rate;