In [1]:
# Covid Data Exploration 
# Link to Dataset: https://ourworldindata.org/covid-deaths

In [2]:
import sqlite3
import pandas as pd

In [3]:
df_deaths = pd.read_csv('data/CovidDeaths.csv')
df_deaths.head()

Unnamed: 0,iso_code,continent,location,date,population,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,...,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million
0,AFG,Asia,Afghanistan,2/24/2020,39835428.0,5.0,5.0,,,,...,,,,,,,,,,
1,AFG,Asia,Afghanistan,2/25/2020,39835428.0,5.0,0.0,,,,...,,,,,,,,,,
2,AFG,Asia,Afghanistan,2/26/2020,39835428.0,5.0,0.0,,,,...,,,,,,,,,,
3,AFG,Asia,Afghanistan,2/27/2020,39835428.0,5.0,0.0,,,,...,,,,,,,,,,
4,AFG,Asia,Afghanistan,2/28/2020,39835428.0,5.0,0.0,,,,...,,,,,,,,,,


In [4]:
df_vaccinations = pd.read_csv('data/CovidVaccinations.csv')
df_vaccinations.head()

Unnamed: 0,iso_code,continent,location,date,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2/24/2020,,,,,,,...,,,37.746,0.5,64.83,0.511,,,,
1,AFG,Asia,Afghanistan,2/25/2020,,,,,,,...,,,37.746,0.5,64.83,0.511,,,,
2,AFG,Asia,Afghanistan,2/26/2020,,,,,,,...,,,37.746,0.5,64.83,0.511,,,,
3,AFG,Asia,Afghanistan,2/27/2020,,,,,,,...,,,37.746,0.5,64.83,0.511,,,,
4,AFG,Asia,Afghanistan,2/28/2020,,,,,,,...,,,37.746,0.5,64.83,0.511,,,,


In [5]:
df_deaths['date'] = pd.to_datetime(df_deaths['date'])
df_deaths['date'] = df_deaths['date'].dt.strftime('%Y-%m-%d')
df_deaths.head()

Unnamed: 0,iso_code,continent,location,date,population,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,...,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million
0,AFG,Asia,Afghanistan,2020-02-24,39835428.0,5.0,5.0,,,,...,,,,,,,,,,
1,AFG,Asia,Afghanistan,2020-02-25,39835428.0,5.0,0.0,,,,...,,,,,,,,,,
2,AFG,Asia,Afghanistan,2020-02-26,39835428.0,5.0,0.0,,,,...,,,,,,,,,,
3,AFG,Asia,Afghanistan,2020-02-27,39835428.0,5.0,0.0,,,,...,,,,,,,,,,
4,AFG,Asia,Afghanistan,2020-02-28,39835428.0,5.0,0.0,,,,...,,,,,,,,,,


In [6]:
df_vaccinations['date'] = pd.to_datetime(df_vaccinations['date'])
df_vaccinations['date'] = df_vaccinations['date'].dt.strftime('%Y-%m-%d')
df_vaccinations.head()

Unnamed: 0,iso_code,continent,location,date,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-02-24,,,,,,,...,,,37.746,0.5,64.83,0.511,,,,
1,AFG,Asia,Afghanistan,2020-02-25,,,,,,,...,,,37.746,0.5,64.83,0.511,,,,
2,AFG,Asia,Afghanistan,2020-02-26,,,,,,,...,,,37.746,0.5,64.83,0.511,,,,
3,AFG,Asia,Afghanistan,2020-02-27,,,,,,,...,,,37.746,0.5,64.83,0.511,,,,
4,AFG,Asia,Afghanistan,2020-02-28,,,,,,,...,,,37.746,0.5,64.83,0.511,,,,


In [7]:
con = sqlite3.connect("covid.db")

# drop data into database
df_deaths.to_sql("coviddeaths", con)
df_vaccinations.to_sql("covidvaccinations", con)

In [8]:
%load_ext sql

In [9]:
%sql sqlite:///covid.db

In [10]:
# -- Total Cases vs Total Deaths
# -- Shows likelihood of dying if you contract COVID in Germany

In [11]:
%%sql

SELECT 
    location
    , date
    , total_cases
    , total_deaths
    , (total_deaths/total_cases)*100 AS DeathPercentage
FROM coviddeaths
WHERE location = 'Germany' 
    AND continent IS not null
ORDER BY location, date
LIMIT 100

 * sqlite:///covid.db
Done.


location,date,total_cases,total_deaths,DeathPercentage
Germany,2020-01-27,1.0,,
Germany,2020-01-28,4.0,,
Germany,2020-01-29,4.0,,
Germany,2020-01-30,4.0,,
Germany,2020-01-31,5.0,,
Germany,2020-02-01,8.0,,
Germany,2020-02-02,10.0,,
Germany,2020-02-03,12.0,,
Germany,2020-02-04,12.0,,
Germany,2020-02-05,12.0,,


In [12]:
# -- Total Cases vs Population
# -- Shows what percentage of population in Germany is infected with Covid

In [13]:
%%sql

SELECT
    location
    , date
    , population
    , total_cases
    , (total_cases/population)*100 AS PercentPopulationInfected
FROM coviddeaths
WHERE location = 'Germany' 
    AND continent IS not null
ORDER BY location, date
LIMIT 100

 * sqlite:///covid.db
Done.


location,date,population,total_cases,PercentPopulationInfected
Germany,2020-01-27,83900471.0,1.0,1.1918884221758422e-06
Germany,2020-01-28,83900471.0,4.0,4.767553688703369e-06
Germany,2020-01-29,83900471.0,4.0,4.767553688703369e-06
Germany,2020-01-30,83900471.0,4.0,4.767553688703369e-06
Germany,2020-01-31,83900471.0,5.0,5.959442110879211e-06
Germany,2020-02-01,83900471.0,8.0,9.535107377406738e-06
Germany,2020-02-02,83900471.0,10.0,1.191888422175842e-05
Germany,2020-02-03,83900471.0,12.0,1.4302661066110104e-05
Germany,2020-02-04,83900471.0,12.0,1.4302661066110104e-05
Germany,2020-02-05,83900471.0,12.0,1.4302661066110104e-05


In [14]:
# -- Countries with Highest Infection Rate compared to Population

In [15]:
%%sql

SELECT
    location
    , population
    , MAX(total_cases) AS HighestInfectionCount
    , MAX((total_cases/population))*100 AS PercentPopulationInfected
FROM coviddeaths
WHERE continent IS not null
GROUP BY location, population
ORDER BY PercentPopulationInfected DESC

 * sqlite:///covid.db
Done.


location,population,HighestInfectionCount,PercentPopulationInfected
Andorra,77354.0,33025.0,42.6933319543915
Seychelles,98910.0,34367.0,34.74572843999596
Gibraltar,33691.0,11702.0,34.733311566887295
Montenegro,628051.0,212144.0,33.77814858984382
San Marino,34010.0,11466.0,33.713613643046166
Aruba,107195.0,32220.0,30.057372078921592
Slovenia,2078723.0,589458.0,28.35673632321382
Faeroe Islands,49053.0,13822.0,28.17768536073227
Slovakia,5449270.0,1454621.0,26.693869087052025
Cyprus,896005.0,237260.0,26.479762947751407


In [16]:
# -- Countries with the Highest Death Count

In [17]:
%%sql

SELECT 
    location
    , MAX(CAST(total_deaths AS int)) AS TotalDeathCount
FROM coviddeaths
WHERE continent IS not null
GROUP BY location
ORDER BY TotalDeathCount DESC

 * sqlite:///covid.db
Done.


location,TotalDeathCount
United States,865968.0
Brazil,623191.0
India,489409.0
Russia,318869.0
Mexico,302721.0
Peru,204141.0
United Kingdom,153913.0
Indonesia,144206.0
Italy,143296.0
Iran,132202.0


In [18]:
# -- BREAKING THINGS DOWN BY CONTINENT
# -- Showing continents with the highest death count

In [19]:
%%sql

SELECT 
    continent
    , MAX(CAST(total_deaths AS int)) AS TotalDeathCount
FROM coviddeaths
WHERE continent IS not null
GROUP BY continent
ORDER BY TotalDeathCount DESC

 * sqlite:///covid.db
Done.


continent,TotalDeathCount
North America,865968
South America,623191
Asia,489409
Europe,318869
Africa,94063
Oceania,3121


In [20]:
# -- GLOBAL NUMBERS
# -- Per date

In [21]:
%%sql

SELECT
    date
    , SUM(new_cases) AS total_cases
    , SUM(CAST(new_deaths AS int)) AS total_deaths
    , SUM(CAST(new_deaths AS int))/SUM(new_cases)*100 AS DeathPercentage
FROM coviddeaths
WHERE continent IS not null
GROUP BY date
ORDER BY date
LIMIT 100

 * sqlite:///covid.db
Done.


date,total_cases,total_deaths,DeathPercentage
2020-01-01,,,
2020-01-02,,,
2020-01-03,,,
2020-01-04,,,
2020-01-05,,,
2020-01-06,,,
2020-01-07,,,
2020-01-08,,,
2020-01-09,,,
2020-01-10,,,


In [22]:
# -- GLOBAL NUMBERS
# -- In total

In [23]:
%%sql

SELECT
    SUM(new_cases) AS total_cases
    , SUM(CAST(new_deaths AS int)) AS total_deaths
    , SUM(CAST(new_deaths AS int))/SUM(new_cases)*100 AS DeathPercentage
FROM coviddeaths
WHERE continent IS not null
ORDER BY 1,2

 * sqlite:///covid.db
Done.


total_cases,total_deaths,DeathPercentage
348560179.0,5566192,1.5969098983048202


In [24]:
# --Total Population vs Vaccination
# -- Shows Population that has recieved at least one Covid Vaccine in Germany

In [25]:
%%sql

SELECT
    dea.continent
    , dea.location
    , dea.date
    , dea.population
    , vac.new_vaccinations
    , SUM(vac.new_vaccinations) OVER (PARTITION BY dea.location ORDER BY dea.location, dea.date) AS RollingPeopleVaccinated
FROM covidvaccinations vac
JOIN coviddeaths dea
    ON dea.location = vac.location 
    AND dea.date = vac.date
WHERE dea.continent IS not null 
    AND dea.location = 'Germany'
ORDER BY dea.location, dea.date
LIMIT 50 OFFSET 330

 * sqlite:///covid.db
Done.


continent,location,date,population,new_vaccinations,RollingPeopleVaccinated
Europe,Germany,2020-12-22,83900471.0,,
Europe,Germany,2020-12-23,83900471.0,,
Europe,Germany,2020-12-24,83900471.0,,
Europe,Germany,2020-12-25,83900471.0,,
Europe,Germany,2020-12-26,83900471.0,,
Europe,Germany,2020-12-27,83900471.0,,
Europe,Germany,2020-12-28,83900471.0,18096.0,18096.0
Europe,Germany,2020-12-29,83900471.0,50698.0,68794.0
Europe,Germany,2020-12-30,83900471.0,64195.0,132989.0
Europe,Germany,2020-12-31,83900471.0,50011.0,183000.0


In [26]:
# -- Using CTE to perform Calculation on Partition By in previous query

# -- Shows Percentage of Population that has recieved at least one Covid Vaccine in Germany

In [27]:
%%sql

WITH PopvsVac (continent, location, date, population, new_vaccinations, RollingPeopleVaccinated)
AS
(
    SELECT
        dea.continent
        , dea.location
        , dea.date
        , dea.population
        , vac.new_vaccinations
        , SUM(CAST(vac.new_vaccinations AS int)) OVER (PARTITION BY dea.location ORDER BY dea.location, dea.date) AS RollingPeopleVaccinated
    FROM covidvaccinations vac
    JOIN coviddeaths dea
        ON dea.location = vac.location 
        AND dea.date = vac.date 
    WHERE dea.continent IS not null 
        AND dea.location = 'Germany'
)
SELECT *
    , (RollingPeopleVaccinated/Population)*100 AS PercentagePeopleVaccinated
FROM PopvsVac
LIMIT 50 OFFSET 330

 * sqlite:///covid.db
Done.


continent,location,date,population,new_vaccinations,RollingPeopleVaccinated,PercentagePeopleVaccinated
Europe,Germany,2020-12-22,83900471.0,,,
Europe,Germany,2020-12-23,83900471.0,,,
Europe,Germany,2020-12-24,83900471.0,,,
Europe,Germany,2020-12-25,83900471.0,,,
Europe,Germany,2020-12-26,83900471.0,,,
Europe,Germany,2020-12-27,83900471.0,,,
Europe,Germany,2020-12-28,83900471.0,18096.0,18096.0,0.021568412887694
Europe,Germany,2020-12-29,83900471.0,50698.0,68794.0,0.0819947721151648
Europe,Germany,2020-12-30,83900471.0,64195.0,132989.0,0.1585080493767431
Europe,Germany,2020-12-31,83900471.0,50011.0,183000.0,0.2181155812581791
