## Project Setup

In [198]:
import matplotlib.pyplot as plt
import os
import pandas as pd
import seaborn as sns
from pathlib import Path
from datetime import datetime

In [199]:
pd.plotting.register_matplotlib_converters()

In [200]:
datasets_dir = str(Path(os.path.abspath('')).parent.joinpath("datasets"))

## COVID-19 Dataset

### Dataset Cleanup

In [201]:
raw_covid_df = pd.read_csv(f"{datasets_dir}/owid-covid-data.csv")

In [202]:
raw_covid_df.tail(3)

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
47956,,,International,2020-10-02,696.0,,,7.0,,,...,,,,,,,,,,
47957,,,International,2020-10-03,696.0,,,7.0,,,...,,,,,,,,,,
47958,,,International,2020-10-04,696.0,,,7.0,,,...,,,,,,,,,,


In [208]:
mon_year_of = lambda str_date: datetime\
                                .strptime(str_date, "%Y-%m-%d")\
                                .strftime('%Y-%m')

In [209]:
staging_covid_df = raw_covid_df[raw_covid_df['continent'].notnull()].reset_index()

In [210]:
staging_covid_df['year_month'] = staging_covid_df.apply(lambda row: mon_year_of(row['date']), axis=1)

In [211]:
staging_covid_df.tail(3)

Unnamed: 0,index,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,...,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,year_month
47398,47398,ZWE,Africa,Zimbabwe,2020-10-02,7850.0,12.0,14.0,228.0,0.0,...,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.535,2020-10
47399,47399,ZWE,Africa,Zimbabwe,2020-10-03,7858.0,8.0,10.143,228.0,0.0,...,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.535,2020-10
47400,47400,ZWE,Africa,Zimbabwe,2020-10-04,7885.0,27.0,11.714,228.0,0.0,...,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.535,2020-10


In [212]:
covid_df = staging_covid_df.groupby(['year_month', 'continent', 'location', 'iso_code'], as_index=False)\
            .agg({'new_cases': sum, 'new_deaths': sum})\
            .sort_values(by=['year_month', 'continent','location'],
                         ascending=[False, True, True])

### Plotting