# Covid-19 Global Analysis

This notebook explores analysis and visualization of Covid-19 data.

In [17]:
import pandas as pd
import numpy as np
import os
import altair as alt
import seaborn as sns
import matplotlib as plt

In [86]:
basedir = os.path.dirname(os.path.abspath(os.path.dirname("..")))
datadir = os.path.join(basedir, 'data')

In [127]:
# Import covid 19 Geographic Worldwide Distribution Data Set
cov_global_fp = os.path.join(datadir, 'COVID-19-geographic-disbtribution-worldwide.csv')

In [128]:
cov_global = pd.read_csv(cov_global_fp).rename(columns={'Countries and territories': 'country'})

In [129]:
cov_global.head(10)

Unnamed: 0,DateRep,Day,Month,Year,Cases,Deaths,country,GeoId
0,3/21/20,21,3,2020,2,0,Afghanistan,AF
1,3/20/20,20,3,2020,0,0,Afghanistan,AF
2,3/19/20,19,3,2020,0,0,Afghanistan,AF
3,3/18/20,18,3,2020,1,0,Afghanistan,AF
4,3/17/20,17,3,2020,5,0,Afghanistan,AF
5,3/16/20,16,3,2020,6,0,Afghanistan,AF
6,3/15/20,15,3,2020,3,0,Afghanistan,AF
7,3/11/20,11,3,2020,3,0,Afghanistan,AF
8,3/8/20,8,3,2020,3,0,Afghanistan,AF
9,3/2/20,2,3,2020,0,0,Afghanistan,AF


## Covid Statistics By Country

In [130]:
group_by_country = cov_global.groupby(by=['country'])

In [131]:
totals_by_country = group_by_country.sum()
totals_by_country = totals_by_country.reset_index()
totals_by_country = totals_by_country.drop(columns=['Day', 'Month', 'Year'])

In [132]:
totals_by_country['prct_deaths_per_case'] = (totals_by_country['Deaths'] / totals_by_country['Cases']) * 100

### Top 10 Countries with Coronavirus Deaths

In [133]:
top_deaths_country = totals_by_country.sort_values(by=['Deaths'], ascending=False).head(10)

In [134]:
alt.renderers.enable('default')
top_deaths_chart = alt.Chart(top_deaths_country).mark_bar().encode(
    x='country',
    y='Deaths'
)
top_deaths_chart

### Top 10 Countries with Corona Virus Cases

In [141]:
top_cases_country = totals_by_country.sort_values(by=['prct_deaths_per_case'], ascending=False).head(20)
top_cases_country

Unnamed: 0,country,Cases,Deaths,prct_deaths_per_case
155,Sudan,2,1,50.0
31,Cayman_Islands,3,1,33.333333
60,Gabon,3,1,33.333333
72,Guyana,5,1,20.0
165,Ukraine,26,3,11.538462
2,Algeria,94,10,10.638298
142,San_Marino,151,14,9.271523
85,Italy,47021,4032,8.574892
79,Indonesia,309,25,8.090615
133,Philippines,230,18,7.826087


In [142]:
alt.renderers.enable('default')
top_cases_chart = alt.Chart(top_cases_country).mark_bar().encode(
    x='country',
    y='Cases'
)
top_cases_chart

In [140]:
top_deaths_prct_country = totals_by_country.sort_values(by=['Cases'], ascending=False).head(20)
top_deaths_prct_country

Unnamed: 0,country,Cases,Deaths,prct_deaths_per_case
35,China,81416,3261,4.005355
85,Italy,47021,4032,8.574892
153,Spain,19980,1002,5.015015
80,Iran,19644,1433,7.294848
169,United_States_of_America,19624,260,1.324908
63,Germany,18323,45,0.245593
58,France,12612,450,3.56803
152,South_Korea,8799,103,1.170588
158,Switzerland,4840,43,0.88843
167,United_Kingdom,3983,177,4.443887


In [144]:
alt.renderers.enable('default')
top_cases_chart = alt.Chart(top_deaths_prct_country).mark_bar().encode(
    x='country',
    y='prct_deaths_per_case'
)
top_cases_chart

## Population Data

In [145]:
population_fp = os.path.join(datadir, 'population_by_year.csv')

In [146]:
population = pd.read_csv(population_fp).rename(columns={'Country Name': 'country'})

In [147]:
population.head()

Unnamed: 0,country,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,Unnamed: 64
0,Aruba,ABW,"Population, total",SP.POP.TOTL,54211.0,55438.0,56225.0,56695.0,57032.0,57360.0,...,102046.0,102560.0,103159.0,103774.0,104341.0,104872.0,105366.0,105845.0,,
1,Afghanistan,AFG,"Population, total",SP.POP.TOTL,8996973.0,9169410.0,9351441.0,9543205.0,9744781.0,9956320.0,...,30117413.0,31161376.0,32269589.0,33370794.0,34413603.0,35383128.0,36296400.0,37172386.0,,
2,Angola,AGO,"Population, total",SP.POP.TOTL,5454933.0,5531472.0,5608539.0,5679458.0,5735044.0,5770570.0,...,24220661.0,25107931.0,26015780.0,26941779.0,27884381.0,28842484.0,29816748.0,30809762.0,,
3,Albania,ALB,"Population, total",SP.POP.TOTL,1608800.0,1659800.0,1711319.0,1762621.0,1814135.0,1864791.0,...,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,,
4,Andorra,AND,"Population, total",SP.POP.TOTL,13411.0,14375.0,15370.0,16412.0,17469.0,18549.0,...,83747.0,82427.0,80774.0,79213.0,78011.0,77297.0,77001.0,77006.0,,


In [148]:
"""
Determine whether there is any reported 2019 population data 
 [x] True: No data has been reported for all countries and territories for 2019 year
 [ ]False: Some regions reported population data for 2019 year
""" 
population['2019'].isna().sum() == len(population['2019'])

True

In [149]:
# Use most current population year - 2018
population = population[['country', '2018']]
population

Unnamed: 0,country,2018
0,Aruba,105845.0
1,Afghanistan,37172386.0
2,Angola,30809762.0
3,Albania,2866376.0
4,Andorra,77006.0
...,...,...
259,Kosovo,1845300.0
260,"Yemen, Rep.",28498687.0
261,South Africa,57779622.0
262,Zambia,17351822.0


In [150]:
population_cov = pd.merge(totals_by_country, population, how='left', on=['country']).rename(columns={'2018': 'population'})
population_cov.sample(10)

Unnamed: 0,country,Cases,Deaths,prct_deaths_per_case,population
6,Armenia,136,0,0.0,2951776.0
140,Saint_Lucia,2,0,0.0,
40,Croatia,126,1,0.793651,4089400.0
120,New_Zealand,53,0,0.0,
52,Estonia,283,0,0.0,1320884.0
30,Cases_on_an_international_conveyance_Japan,696,7,1.005747,
50,El_Salvador,3,0,0.0,
100,Luxembourg,484,5,1.033058,607728.0
5,Argentina,158,3,1.898734,44494502.0
129,Panama,200,1,0.5,4176873.0


## Covid-19 Statistics With Population Data

In [152]:
population_cov['prct_cases'] = (population_cov['Cases']/ population_cov['population'] * 100)

In [153]:
population_cov

Unnamed: 0,country,Cases,Deaths,prct_deaths_per_case,population,prct_cases
0,Afghanistan,24,0,0.000000,37172386.0,0.000065
1,Albania,70,2,2.857143,2866376.0,0.002442
2,Algeria,94,10,10.638298,42228429.0,0.000223
3,Andorra,75,0,0.000000,77006.0,0.097395
4,Antigua_and_Barbuda,1,0,0.000000,,
...,...,...,...,...,...,...
171,Uzbekistan,33,0,0.000000,32955400.0,0.000100
172,Venezuela,36,0,0.000000,,
173,Vietnam,87,0,0.000000,95540395.0,0.000091
174,Zambia,2,0,0.000000,17351822.0,0.000012


In [154]:
top_case_prct_country = population_cov.sort_values(by=['prct_cases'], ascending=False).head(20)
top_case_prct_country

Unnamed: 0,country,Cases,Deaths,prct_deaths_per_case,population,prct_cases
77,Iceland,409,1,0.244499,353574.0,0.115676
3,Andorra,75,0,0.0,77006.0,0.097395
98,Liechtenstein,34,0,0.0,37910.0,0.089686
100,Luxembourg,484,5,1.033058,607728.0,0.079641
85,Italy,47021,4032,8.574892,60431283.0,0.077809
158,Switzerland,4840,43,0.88843,8516543.0,0.056831
153,Spain,19980,1002,5.015015,46723749.0,0.042762
125,Norway,1742,7,0.401837,5314336.0,0.032779
109,Monaco,12,0,0.0,38682.0,0.031022
8,Austria,2649,6,0.226501,8847037.0,0.029942


In [155]:
alt.renderers.enable('default')
top_cases_chart = alt.Chart(top_case_prct_country).mark_bar().encode(
    x='country',
    y='prct_cases'
)
top_cases_chart