# Covid statistics by country
[Our World in Data source](https://github.com/owid/covid-19-data/tree/master/public/data)

[CDC United States data source](https://covid.cdc.gov/covid-data-tracker/#county-view)

In [19]:
import pandas as pd

# World other than United States

In [20]:
covid_world = pd.read_csv('/Users/fiona/pandas_tutorials/covid_cases/owid-covid-data.csv')
covid_world.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,AFG,Asia,Afghanistan,2020-02-24,1.0,1.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498
1,AFG,Asia,Afghanistan,2020-02-25,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498
2,AFG,Asia,Afghanistan,2020-02-26,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498
3,AFG,Asia,Afghanistan,2020-02-27,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498
4,AFG,Asia,Afghanistan,2020-02-28,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498


In [21]:
# dropping columns
cols_retain = ['location', 'date', 'total_cases', 'new_cases','total_deaths', 'new_deaths']
covid_revised = covid_world[cols_retain].copy()
covid_revised.head()

Unnamed: 0,location,date,total_cases,new_cases,total_deaths,new_deaths
0,Afghanistan,2020-02-24,1.0,1.0,,
1,Afghanistan,2020-02-25,1.0,0.0,,
2,Afghanistan,2020-02-26,1.0,0.0,,
3,Afghanistan,2020-02-27,1.0,0.0,,
4,Afghanistan,2020-02-28,1.0,0.0,,


In [22]:
# France, Australia, Germany (North Rhine-Westphilia), Japan(Tokyo), Brazil(Sao Paulo) - US more specific info

# countries = ['France', 'Australia', 'Germany', 'Japan', 'Brazil']

covid_france = covid_revised[covid_revised['location'] == 'France'].copy()
covid_australia = covid_revised[covid_revised['location'] == 'Australia'].copy()
covid_germany = covid_revised[covid_revised['location'] == 'Germany'].copy()
covid_japan = covid_revised[covid_revised['location'] == 'Japan'].copy()
covid_brazil = covid_revised[covid_revised['location'] == 'Brazil'].copy()

covid_japan.head()

Unnamed: 0,location,date,total_cases,new_cases,total_deaths,new_deaths
25551,Japan,2020-01-22,2.0,,,
25552,Japan,2020-01-23,2.0,0.0,,
25553,Japan,2020-01-24,2.0,0.0,,
25554,Japan,2020-01-25,2.0,0.0,,
25555,Japan,2020-01-26,4.0,2.0,,


In [23]:
# population of each country

pop_fr = 65273511
pop_au = 25499884
pop_gr = 83783942
pop_jp = 126476461
pop_br = 212559417

In [24]:
# rates per 100k

country_dfs = [covid_france, covid_australia, covid_germany, covid_japan, covid_brazil]
country_pop = [pop_fr, pop_au, pop_gr, pop_jp, pop_br]

for i,df in enumerate(country_dfs):
    pop = country_pop[i]
    df['deaths_per_100k'] = df['new_deaths']/(pop/100000)
    df['cases_per_100k'] = df['new_cases']/(pop/100000)
    df['total_deaths_100k'] = df['total_deaths']/(pop/100000)
    df['total_cases_100k'] = df['total_cases']/(pop/100000)

covid_australia.head()


Unnamed: 0,location,date,total_cases,new_cases,total_deaths,new_deaths,deaths_per_100k,cases_per_100k,total_deaths_100k,total_cases_100k
2399,Australia,2020-01-26,4.0,4.0,,,,0.015686,,0.015686
2400,Australia,2020-01-27,5.0,1.0,,,,0.003922,,0.019608
2401,Australia,2020-01-28,5.0,0.0,,,,0.0,,0.019608
2402,Australia,2020-01-29,6.0,1.0,,,,0.003922,,0.02353
2403,Australia,2020-01-30,9.0,3.0,,,,0.011765,,0.035294


In [25]:
for df in country_dfs:
    df['datetime'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
    df.set_index(['datetime'], inplace=True)
    df.drop(columns = ['date'], inplace=True)

covid_germany.head()

Unnamed: 0_level_0,location,total_cases,new_cases,total_deaths,new_deaths,deaths_per_100k,cases_per_100k,total_deaths_100k,total_cases_100k
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-27,Germany,1.0,1.0,,,,0.001194,,0.001194
2020-01-28,Germany,4.0,3.0,,,,0.003581,,0.004774
2020-01-29,Germany,4.0,0.0,,,,0.0,,0.004774
2020-01-30,Germany,4.0,0.0,,,,0.0,,0.004774
2020-01-31,Germany,5.0,1.0,,,,0.001194,,0.005968


# United States

In [26]:
cases_us = pd.read_csv('/Users/fiona/pandas_tutorials/covid_cases/case_daily_trends__united_states.csv', skiprows=3)
cases_wa = pd.read_csv('/Users/fiona/pandas_tutorials/covid_cases/case_daily_trends__washington.csv', skiprows=3)
cases_nd = pd.read_csv('/Users/fiona/pandas_tutorials/covid_cases/case_daily_trends__north_dakota.csv', skiprows=3)
cases_sd = pd.read_csv('/Users/fiona/pandas_tutorials/covid_cases/case_daily_trends__south_dakota.csv', skiprows=3)
death_sd = pd.read_csv('/Users/fiona/pandas_tutorials/covid_cases/death_daily_trends__south_dakota.csv', skiprows=3)
death_nd = pd.read_csv('/Users/fiona/pandas_tutorials/covid_cases/death_daily_trends__north_dakota.csv', skiprows=3)
death_wa = pd.read_csv('/Users/fiona/pandas_tutorials/covid_cases/death_daily_trends__washington.csv', skiprows=3)
death_us = pd.read_csv('/Users/fiona/pandas_tutorials/covid_cases/death_daily_trends__united_states.csv', skiprows=3)

In [27]:
pop_wa = 7797100
pop_us =  331796277 
pop_nd = 803686.2
pop_sd =  858469

In [28]:
death_wa.tail()

Unnamed: 0,Date,New Deaths,7-Day Moving Avg
332,Jan 26 2020,0,0
333,Jan 25 2020,0,0
334,Jan 24 2020,0,0
335,Jan 23 2020,0,0
336,Jan 22 2020,0,0


# Data cleaning

In [29]:
dfs = [cases_us, cases_wa, cases_nd, cases_sd, death_sd, death_wa, death_nd, death_us]

for df in dfs:   
    df.columns = [x.lower() for x in df.columns]
    df.columns = [x.replace(' ', '_') for x in df.columns]
    df['datetime'] = pd.to_datetime(df['date'], format='%b %d %Y')
    df.set_index(['datetime'], inplace=True)
    df.drop(columns = ['date', '7-day_moving_avg'], inplace=True)

In [30]:
death_nd.head()

Unnamed: 0_level_0,new_deaths
datetime,Unnamed: 1_level_1
2020-12-23,17
2020-12-22,5
2020-12-21,5
2020-12-20,2
2020-12-19,0


## Organising dfs

Merging case and death numbers for each state from United States into one dataframe.

In [31]:
case_dfs = [cases_us, cases_wa, cases_nd, cases_sd]
death_dfs = [death_us, death_wa, death_nd, death_sd]
population = [pop_us, pop_wa, pop_nd, pop_sd]

for i,df in enumerate(case_dfs):
    death_df = death_dfs[i]
    pop = population[i]
    df['new_deaths']=death_df['new_deaths']
    df['deaths_per_100k'] = df['new_deaths']/(pop/100000)
    df['cases_per_100k'] = df['new_cases']/(pop/100000)
    

In [32]:
cases_us.head()

Unnamed: 0_level_0,new_cases,new_deaths,deaths_per_100k,cases_per_100k
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-12-23,221408,3362,1.013272,66.730104
2020-12-22,195860,3165,0.953899,59.030198
2020-12-21,183927,1725,0.519897,55.433714
2020-12-20,197616,1584,0.477401,59.559439
2020-12-19,201490,2624,0.790847,60.727023


In [33]:
cases_wa.head()

Unnamed: 0_level_0,new_cases,new_deaths,deaths_per_100k,cases_per_100k
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-12-23,2315,31,0.397584,29.690526
2020-12-22,1252,25,0.320632,16.057252
2020-12-21,1874,2,0.025651,24.034577
2020-12-20,2161,0,0.0,27.715433
2020-12-19,2332,0,0.0,29.908556


In [34]:
# check to make sure loop is working correctly
covid_us = cases_us
covid_us['new_deaths'] = death_us['new_deaths']
covid_us['deaths_per_100k'] = covid_us['new_deaths']/(pop_us/100000)
covid_us['cases_per_100k'] = covid_us['new_cases']/(pop_us/100000)
covid_us.head()

Unnamed: 0_level_0,new_cases,new_deaths,deaths_per_100k,cases_per_100k
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-12-23,221408,3362,1.013272,66.730104
2020-12-22,195860,3165,0.953899,59.030198
2020-12-21,183927,1725,0.519897,55.433714
2020-12-20,197616,1584,0.477401,59.559439
2020-12-19,201490,2624,0.790847,60.727023


In [35]:
# check to see if loop is working correctly
covid_wa = cases_wa
covid_wa['new_deaths'] = death_wa['new_deaths']
covid_wa['deaths_per_100k'] = covid_wa['new_deaths']/(pop_wa/100000)
covid_wa['cases_per_100k'] = covid_wa['new_cases']/(pop_wa/100000)
covid_wa.head()

Unnamed: 0_level_0,new_cases,new_deaths,deaths_per_100k,cases_per_100k
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-12-23,2315,31,0.397584,29.690526
2020-12-22,1252,25,0.320632,16.057252
2020-12-21,1874,2,0.025651,24.034577
2020-12-20,2161,0,0.0,27.715433
2020-12-19,2332,0,0.0,29.908556


## Organising for plotting

Lists to call when plotting rates of covid and covid deaths. 

In [42]:
covid_country_high = [covid_wa, covid_france, covid_germany, covid_brazil]
covid_country_names_high = ['Washington State' , 'France', 'Germany', 'Brazil']
covid_country_low = [covid_australia, covid_japan]
covid_country_names_low = ['Australia', 'Japan']
# colors_high = ['#f94144', '#f3722c', '#90be6d', '#577590']
# colors_low = ['#f8961e', '#43aa8b']
colors_high = ['#F8F32B', '#457B9D', '#1D3557', '#06D6A0']
colors_low = ['#E63946', '#A8DADC']