In [None]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

from functools import reduce

In [None]:
mpl.rcParams['figure.figsize'] = (9, 5)

# Goal
My goal is to visualize various aspect of the `COVID-19` pandemic.

# Data sources

In this project I use data from the following sources:
- https://github.com/CSSEGISandData/COVID-19 - JHU CSSE COVID-19 Data.
- [GDP per capita PPP](https://data.worldbank.org/indicator/NY.GDP.PCAP.PP.CD) - The World Bank.
- [Population](https://data.worldbank.org/indicator/SP.POP.TOTL) - The World Bank.
- [Urban Population](https://data.worldbank.org/indicator/SP.URB.TOTL.IN.ZS) - The World Bank.
- [Population living in slums](https://data.worldbank.org/indicator/EN.POP.SLUM.UR.ZS) - The World Bank.
- [Rural population](https://data.worldbank.org/indicator/SP.RUR.TOTL.ZS) - The World Bank.
- [Life expectancy at birth](https://data.worldbank.org/indicator/SP.DYN.LE00.IN) - The World Bank.
- [Current healthcare expenditure](https://data.worldbank.org/indicator/SH.XPD.CHEX.GD.ZS) - The World Bank.
- https://datahub.io/JohnSnowLabs/country-and-continent-codes-list - country codes and continents.

# Data preparation

## COVID-19 data
To  obtain a copy of the data clone the repository: <br>
`git clone https://github.com/CSSEGISandData/COVID-19`

In [None]:
path = './data/COVID-19/csse_covid_19_data/csse_covid_19_time_series/'

In [None]:
conf = pd.read_csv(f'{path}/time_series_covid19_confirmed_global.csv')
recov = pd.read_csv(f'{path}/time_series_covid19_recovered_global.csv')
dead = pd.read_csv(f'{path}/time_series_covid19_deaths_global.csv')

In [None]:
def rename_countries(df):
    
    # Fix country names.
    # This also helps with grouping (eg. Congo)
    df['Country'] = df['Country'].apply(lambda x: "Taiwan" if x == "Taiwan*" else x)
    df['Country'] = df['Country'].apply(lambda x: "Korea" if x == "Korea, South" else x)
    df['Country'] = df['Country'].apply(lambda x: "Macedonia"  if x == "North Macedonia" else x)
    df['Country'] = df['Country'].apply(lambda x: "Cape Verde" if x == "Cabo Verde" else x)
    df['Country'] = df['Country'].apply(lambda x: "Congo" if x == "Congo (Brazzaville)"  else x)
    df['Country'] = df['Country'].apply(lambda x: "Congo" if x == "Congo (Kinshasa)" else x)
    
    return df

In [None]:
def process_data(df):
    """
    
    Convert data from columns to rows.
    
    """
    
    # Drop columns
    df = df.drop(['Lat', 'Long', 'Province/State'], axis=1)
    df = df.rename(columns={"Country/Region": "Country"})    
    
    # Rename countries
    df = rename_countries(df=df)
    
    # Enforce countries are unique
    df = df.groupby('Country', as_index=False).sum()        
    
    # Switch to colum format
    df = df.transpose()
    
    # Copy headers from first row
    df.columns = df.iloc[0, :].to_list()
    df['Date'] = df.index
    df = df[1:]    
    
    # Convert dates
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Convert to ints
    cols = df.columns.to_list()
    cols.remove('Date')
    for col in cols:
        df[col] = df[col].astype(int)
    
    # Reorder & Sort    
    cols = ['Date'] + sorted(cols)
    df = df[cols]
    
    # Reset index
    df = df.reset_index(drop=True)
    df.head()    
    
    return df

In [None]:
conf = process_data(df=conf)
recov = process_data(df=recov)
dead = process_data(df=dead)
conf.tail()

In [None]:
boats = ['Diamond Princess', 'MS Zaandam']
conf = conf.drop(boats, axis=1)
recov = recov.drop(boats, axis=1)
dead = dead.drop(boats, axis=1)

active = conf.drop(['Date'], axis=1) - recov.drop(['Date'], axis=1) - dead.drop(['Date'], axis=1)
active['Date'] = conf['Date']

## Helper functions

In [None]:
dataframes = [conf, recov, dead]
names = ['Confirmed', 'Recovered', 'Deaths']

In [None]:
def get_country_ts(country, dataframes, columns):
    """
    
    Extract data for specific country.
    
    Notes
    -----
    Apply backfill to NaN's.
    
    """
    
    cols = ['Date'] + columns
    ctry = list()
    for df in dataframes:
        tmp = df.loc[:, ['Date', country]]
        ctry.append(tmp)        
    ctry = reduce(lambda x, y: pd.merge(x, y, on='Date', how='outer'), ctry)    
    ctry.columns = cols
    ctry = ctry.fillna(method='bfill')
    
    return ctry

get_country_ts(country='Poland', 
               dataframes=dataframes, 
               columns=names).tail()

In [None]:
def get_country_stats(dataframes, names):
    """
    
    Create dataframe with cases summarize
    by country.
    
    """
    
    stats = []
    for df,name in zip(dataframes, names):
        tmp = df.tail(1).drop('Date', axis=1)
        tmp = tmp.transpose()
        tmp = tmp.reset_index()
        tmp.columns = ['Country', name]
        stats.append(tmp)
    stats = reduce(lambda x, y: pd.merge(x, y, on='Country', how='outer'), stats)    
    
    return stats

get_country_stats(dataframes, names).head()

## Extract mortality rate

In [None]:
def extract_mortality(country):
    """
    
    Calculate mortality rate over time
    for specific country.
    
    """
    
    df = get_country_ts(country=country, 
                        dataframes=[conf, dead], 
                        columns=['Confirmed', 'Deaths'])
    df = df[df['Confirmed'] > 0]
    df['Mortality'] = df['Deaths'] / df['Confirmed']
    df['Mortality'] = df['Mortality'] * 100
    df['Mortality'] = np.round(df['Mortality'], 2)
    df = df[['Date', 'Mortality']]
    df.columns = ['Date', country]
    
    return df

extract_mortality('Poland').tail()

In [None]:
all_countries = sorted(set(conf.drop('Date', axis=1).columns))
mort = list()
for c in all_countries:
    tmp = extract_mortality(country=c)
    mort.append(tmp)
    
mort = reduce(lambda x, y: pd.merge(x, y, on='Date'), mort)    

mort.tail()

In [None]:
print(conf.shape)
print(recov.shape)
print(dead.shape)
print(active.shape)
print(mort.shape)

In [None]:
def count_na(df):
    df = df.drop('Date', axis=1).isna().sum(axis=0)
    df.name = 'Missing'
    df = df.to_frame()
    df.sort_values('Missing', ascending=False)
    return df

In [None]:
count_na(conf).head()

In [None]:
count_na(recov).head()

In [None]:
count_na(dead).head()

In [None]:
count_na(active).head()

In [None]:
count_na(mort).head()

In [None]:
dataframes = [conf, recov, dead, active, mort]
names = ['Confirmed', 'Recovered', 'Deaths', 'Active', 'Mortality']
country_stats = get_country_stats(dataframes, names)
country_stats.head()

In [None]:
country_stats.isna().sum(axis=0)

## First order differences

In [None]:
def get_daily_changes(df):   
    """
    
    Calculate daily change in case
    data, ie apply difference operator.
    
    """
    diff = df.drop(['Date'], axis=1) - df.drop(['Date'], axis=1).shift(1)
    diff['Date'] = df['Date']
    diff.dropna(inplace=True)
    diff.tail()
    return diff

In [None]:
conf_diff = get_daily_changes(df=conf)
recov_diff = get_daily_changes(df=recov)
dead_diff = get_daily_changes(df=dead)
active_diff = get_daily_changes(df=active)
conf_diff.tail()

## Coordinate data

In [None]:
coords = pd.read_csv(f'{path}/time_series_covid19_confirmed_global.csv')
coords = coords.rename(columns={"Country/Region": "Country"})   
coords = coords.loc[~coords['Country'].isin(boats)]
coords = rename_countries(df=coords)
coords = coords[['Country', 'Lat', 'Long']]
coords = coords.groupby('Country', as_index=False).mean()
coords = coords.sort_values('Country')
coords = coords.reset_index(drop=True)
coords.head()

## Continent data

In [None]:
cont_path = './data/datahub/countries.csv'
cont_map = pd.read_csv(cont_path)

cont_map = cont_map.drop(['Continent_Code', 
                          'Two_Letter_Country_Code', 
                          'Country_Number'], axis=1)

cont_map['Country'] = cont_map['Country_Name'].apply(lambda x: x.split(", ")[0])
cont_map = cont_map.rename(columns={"Continent_Name": "Continent", 'Three_Letter_Country_Code': 'Country Code'})
cont_map = cont_map.drop(['Country_Name'], axis=1)
cont_map = cont_map.drop_duplicates(subset=['Country'])

cont_map.head()

In [None]:
countries_covid = set(coords['Country'])

In [None]:
# Change values in countries.csv to match covid data.
to_swap = [('Russian Federation', 'Russia'),
           ('Slovakia (Slovak Republic)', 'Slovakia'),
           ('Kyrgyz Republic', 'Kyrgyzstan'),
           ('Syrian Arab Republic', 'Syria'),
           ('Libyan Arab Jamahiriya', 'Libya'),
           ('Korea, South', 'Korea'),
           ('Brunei Darussalam', 'Brunei'),
           ('Cabo Verde', 'Cape Verde'),
           ('Holy See (Vatican City State)', 'Holy See'),
           ('United States of America', 'US'),
           ('United Kingdom of Great Britain & Northern Ireland', 'United Kingdom'),
           ("Lao People's Democratic Republic", 'Laos'),
           ('Myanmar', 'Burma'),
           ('Czech Republic', 'Czechia'),
           ('Swaziland',  'Eswatini')]

for x in to_swap:
    cont_map.loc[cont_map['Country'] == x[0], 'Country'] = x[1]    

In [None]:
countries_cont_map = set(cont_map['Country'])

In [None]:
countries_covid.difference(countries_cont_map)

In [None]:
ctry_to_cont = pd.merge(coords, cont_map, how='left', on='Country')
ctry_to_cont = ctry_to_cont.dropna()
ctry_to_cont.head()

In [None]:
print(ctry_to_cont.shape)

## World Bank data

In [None]:
wb_path = './data/world_bank/'

In [None]:
def get_world_bank_data(path, desc):
    """
    
    Get World Bank data into usable format.
    
    Notes
    -----
    Forward filling is applied rowwise.
    
    """
    
    df = pd.read_csv(path, skiprows=4)
    df = df.rename(columns={'Country Name': 'Country'})
    df = df.drop(['Country', 'Indicator Name', 'Indicator Code'], axis=1)    
    df = df.ffill(axis=1)
    df = df[['Country Code', '2019']]
    df = df.rename(columns={'2019': desc})
    df = df[~(df[desc].str.isalpha() == True)]
    df[desc] = df[desc].astype(float)
    df[desc] = df[desc].round(2)
    
    return df

In [None]:
wb_le = 'API_SP.DYN.LE00.IN_DS2_en_csv_v2_1308162.csv'
life_expectancy = get_world_bank_data(path=f'{wb_path}/{wb_le}',
                                      desc='Life Expectancy')
life_expectancy.head()

In [None]:
wb_gdp = 'API_NY.GDP.PCAP.PP.CD_DS2_en_csv_v2_1217517.csv'
gdp_per_capita = get_world_bank_data(path=f'{wb_path}/{wb_gdp}',
                                     desc='GDP Per Capita')
gdp_per_capita.head()

In [None]:
wb_pop = 'API_SP.POP.TOTL_DS2_en_csv_v2_1308146.csv'
population = get_world_bank_data(path=f'{wb_path}/{wb_pop}',
                                     desc='Population')
population.head()

In [None]:
wb_urb = 'API_SP.URB.TOTL.IN.ZS_DS2_en_csv_v2_1219669.csv'
urban_population = get_world_bank_data(path=f'{wb_path}/{wb_urb}',
                                     desc='Urban Population %')
urban_population.head()

In [None]:
wb_slum = 'API_EN.POP.SLUM.UR.ZS_DS2_en_csv_v2_1221614.csv'
slum_population = get_world_bank_data(path=f'{wb_path}/{wb_slum}',
                                     desc='Slum Population %')
slum_population.head()

In [None]:
wb_rur = 'API_SP.RUR.TOTL.ZS_DS2_en_csv_v2_1222914.csv'
rural_population = get_world_bank_data(path=f'{wb_path}/{wb_rur}',
                                     desc='Rural Population %')
rural_population.head()

In [None]:
wb_hc = 'API_SH.XPD.CHEX.GD.ZS_DS2_en_csv_v2_1217782.csv'
gdp_healthcare = get_world_bank_data(path=f'{wb_path}/{wb_hc}',
                                     desc='GDP Healthcare %')
gdp_healthcare.head()

In [None]:
world_bank = [life_expectancy,
              gdp_per_capita,
              population,
              urban_population,
              slum_population,
              rural_population,
              gdp_healthcare]

world_bank = reduce(lambda x, y: pd.merge(x, y, on='Country Code', how='outer'), world_bank)
world_bank.head()

In [None]:
world_bank.shape

In [None]:
wb_missing = world_bank.isnull().sum(axis=0)
wb_missing.name = 'Missing'
wb_missing = wb_missing.to_frame()
wb_missing = wb_missing.sort_values('Missing', ascending=False)
wb_missing = wb_missing.reset_index()
wb_missing = wb_missing.rename(columns={'index': 'Column'})
wb_missing = wb_missing[wb_missing['Missing'] > 0]
wb_missing

## Merge world bank with COVID-19

In [None]:
# Check missing countries
S1 = set(ctry_to_cont['Country Code'])
S2 = set(world_bank['Country Code'])
sorted(S1.difference(S2))

In [None]:
ctry_to_cont[ctry_to_cont['Country Code'] == 'ESH']

In [None]:
ctry_to_cont[ctry_to_cont['Country Code'] == 'TWN']

In [None]:
ctry_to_cont[ctry_to_cont['Country Code'] == 'VAT']

In [None]:
merged = pd.merge(ctry_to_cont, country_stats, on='Country', how='outer')
merged = pd.merge(merged, world_bank, on='Country Code', how='outer')
merged.shape

# Data - summary

After the lengthy process of preparing the data I feel obliged to summarize in short some of the key datasets.

From the original `COVID-19` data we created:
- `conf` - Confirmed cases. Timeseries, by country.
- `recov` - Recovered cases. Timeseries, by country.
- `dead` - Fatal cases. Timeseries, by country.
- `active` - Active cases. Calculation: `conf` - `recov` - `dead`. Timeseries, by country.
- `coords` - Countries with latitude and longitude data.
- `country_stats` - Case data summarized by country.

By transforming the above we obtain:
- `conf_diff` - Confirmed cases daily change. Calculated using the difference operator. Timeseries, by country.
- `active_diff` - Confirmed cases daily change. Calculated using the difference operator. Timeseries, by country.
- `recov_diff` - Recovered cases daily change. Calculated using the difference operator. Timeseries, by country.
- `dead_diff` - Fatal cases daily change. Calculated using the difference operator. Timeseries, by country.
- `mort` - Mortality, expressed by `dead`/`conf`. Timeseries, by country.

From X we create:
- `ctry_to_cont` - Map each country to continent.

Using the data above and data from the World Bank we create:
- `world_bank` - Combines all the data from the World Bank.
- `merged` - Combines COVID-19 summary statistics, continents, gdp and population data.

The `COVID-19` data used in this notebook starts on:

In [None]:
str(conf['Date'].min().date())

and ends on:

In [None]:
str(conf['Date'].max().date())

# COVID19 - Globally

First we plot the total number of cases around the world.

In [None]:
all_countries = sorted(set(coords['Country']))
dataframes = [conf, recov, dead, active]
names = ['Confirmed cases',
         'Recovered cases',
         'Fatal cases',
         'Active cases']

In [None]:
# for name,df in zip(names, dataframes):
#     plt.plot(df['Date'],
#              np.log(df.drop('Date', axis=1).sum(axis=1)),
#              label=name)
# plt.ylabel('Cases, Log scale')
# plt.legend(loc='best')
# plt.xticks(rotation=90)
# plt.tight_layout()
# plt.show()

# COVID19 - By continent

We drill down further into the data visualizing cases data at the continent level.

In [None]:
def continent_plot(df, name):
    """
    
    Plot cases over time by continent.
    
    """
    
    continents = set(ctry_to_cont['Continent'])
    for cont in continents:

        C = ctry_to_cont[ctry_to_cont['Continent'] == cont]
        C = C['Country'].to_list()

        plt.plot(df['Date'], df[C].sum(axis=1), label=cont)

    plt.legend(loc='best')
    plt.title(f'{name}')
    plt.ylabel('Cases, log scale')
    plt.tight_layout()
    plt.show()    

In [None]:
for name,df in zip(names, dataframes):
    continent_plot(df=df, name=name)  
    break

# COVID - Country level

Finally we can take a look at the data on a country level. First we show the countries with the most cases.

In [None]:
def sorted_bar_plot(df, name, n=10):
    """
    
    Bar plot of countries with the most cases
    of a certain type.
    
    """
        
    df = df.sum()
    df.name = name
    df = df.to_frame()
    df = df.sort_values(name, ascending=False)
    df = df.reset_index()
    df = df.rename(columns={'index': 'Country'})
    df = df.head(n)

    plt.bar(df['Country'], df[name])
    plt.xticks(rotation=90)
    plt.title(f'{name}')
    plt.ylabel('Cases, log scale')
    plt.tight_layout()
    plt.show()    

In [None]:
for name,df in zip(names, dataframes):
    sorted_bar_plot(df=df, name=name) 
    break

Then we show countries with the highest mortality rate.

In [None]:
high_mort = country_stats[['Country', 'Mortality']]
high_mort = high_mort.sort_values('Mortality', ascending=False)
high_mort = high_mort.reset_index(drop=True)
high_mort.columns = ['Country', 'Mortality Rate']
high_mort = high_mort.head(10)

plt.bar(high_mort['Country'], height=high_mort['Mortality Rate'])
plt.xticks(rotation=90)
plt.ylabel('Moratlity rate (%)')
plt.title('Countries with highest mortality rate')
plt.tight_layout()
plt.show()

In [None]:
def country_plot_cases(country):
    """
    
    Plot country level cases data.
    
    """
    
    names = ['Confirmed', 'Recovered', 'Fatal', 'Active']
    dataframes= [conf, recov, dead, active]    
    
    df = get_country_ts(country=country,
                        dataframes=dataframes,
                        columns=names)
    
    df = df[df['Confirmed'] > 0]
    
    plt.plot(df['Date'], df['Confirmed'], label=names[0])
    plt.plot(df['Date'], df['Recovered'], label=names[1])
    plt.plot(df['Date'], df['Fatal'], label=names[2])
    plt.plot(df['Date'], df['Active'], label=names[3])    
    
    plt.title(f'{country}')
    plt.ylabel('Cases')
    plt.legend(loc='best')
    plt.tight_layout()
    plt.show()      

We can inspect the data for specific countries.

In [None]:
country_plot_cases(country='Germany')

We plot the daily increase in cases for specific countries.

In [None]:
def country_plot_chg(country, n=7):
    """
    
    Plot country level change in cases with
    n day moving average.
    
    """
    names = ['Confirmed Chg']
    dataframes= [conf_diff]        
    
    df = get_country_ts(country=country, dataframes=dataframes, columns=names)
    df = df.rename(columns={'Confirmed Chg': 'New cases'})
    
    plt.plot(df['Date'], df['New cases'],
             label='New cases', alpha=1/2)
    
    plt.fill_between(df['Date'], y1=0, y2=df['New cases'], alpha=1/4)
    
    plt.plot(df['Date'], df['New cases'].rolling(n).mean(), 
             label=f'{n} day average', c='black')    
    
    plt.title(f'{country} - daily new cases')
    plt.ylabel('Cases')
    plt.legend(loc='best')
    plt.tight_layout()
    plt.show()  

In [None]:
country_plot_chg(country='Brazil')        

# Exponential growth

We proceed to visualize the early days of the pandemic. In most cases the growth was exponential as will be seen.
First we create define a function to calculate exponential growth. Secondly we create dataset of timeseries reindexed to begin at the point in time `t0` defined as the first day where a country reported more than a hundred confirmed cases. Then we plot the data.

In [None]:
def exp_growth(a, b, t, tau):
    """
    
    Calculate exponential growth.
    
    Parameters
    ----------
    a : int
        Initial value.
    b : int
        Growth factor.
    t : int 
        Time.
    tau : int
        Time required for increase by factor of b.
        
    Notes
    -----
    See https://en.wikipedia.org/wiki/Exponential_growth 
    for details.
    
    """
    
    return a * np.power(b, t / tau) 

In [None]:
conf_t0 = list()
for country in all_countries:
    t0 = get_country_ts(country=country, dataframes=[conf], columns=['Confirmed'])
    t0 = t0.loc[t0['Confirmed'] >= 100]
    t0 = t0.loc[:, ['Confirmed']]
    t0 = t0.reset_index(drop=True)
    conf_t0.append(t0)
    
conf_t0 = reduce(lambda x, y: pd.merge(x, y, left_index=True, 
                                       right_index=True, how='outer'), 
                 conf_t0)    
conf_t0.columns = all_countries
conf_t0 = conf_t0.head(100)
max_cases = max(conf_t0.max())

In [None]:
def growth_plot(df, countries, periods, steps=50):
    """
    
    Plot growth curves, log scale.
    
    Inputs
    ------
    df :
        `conf_t0` dataframe that contains confirmed
        cases reindexed to start with the date a 
        country had  >= 100 cases.
    countries : list
        List of countries
    periods : list of ints
        Doubling periods for growth curves.
    steps : int
        Number of data points to use.
    
    """
    
    countries = sorted(countries)
    
    # Extract mean and use as starting point for 
    # exponential growth curves.
    a = df.mean(axis=1)[0]
    b = 2

    # List of growth curves
    growth = list()
    for period in periods:
        g = exp_growth(a=a, 
                       b=b, 
                       t=np.arange(steps), 
                       tau=period)
        g = np.log(g)
        growth.append(list(g))

    for g,p in zip(growth, periods):

        # Draw growth curves
        plt.plot(range(steps), g, 
                 c='black', 
                 linestyle='dashed', 
                 alpha=1/2)

        if p == 1:
            s = f'Double every day'
        else:
            s = f'Double every {str(p)} days'       

        # Draw text outside
        plt.text(x=steps,
                 y=g[steps - 1],
                 s=s, 
                 alpha=3/4, 
                 horizontalalignment='left',
                 verticalalignment='center',
                 rotation_mode='anchor')

    # Draw country level data
    plot_df = conf_t0[countries].head(steps)
    for c in countries:
        plt.plot(range(len(plot_df)), np.log(plot_df[c]), label=c)

    plt.xlim(0, steps-1)
    plt.legend(loc='best')
    plt.ylabel('Confirmed cases, log scale')
    plt.xlabel('Days since 100 cases')
    plt.tight_layout()
    plt.show()

In [None]:
growth_plot(df=conf_t0, 
            countries=['US', 'Russia', 'Germany', 'France'], 
            periods=[2, 4, 8], 
            steps=50)

# Economic data

We proceed to visually inspect the relationship between the `COVID-19` dataset and data downloaded from the World Bank. Given that the `Population` column varies wildy we choose a different metric to make comparisons.

In [None]:
figsize = (5, 5)

In [None]:
economic = merged.copy()

economic['Cases Per Milion'] = economic['Confirmed'] / (economic['Population'] /  10**6)
economic['Cases Per Milion'] = economic['Cases Per Milion'].round(2)

economic['Deaths Per Milion'] = economic['Deaths'] / (economic['Population'] /  10**6)
economic['Deaths Per Milion'] = economic['Deaths Per Milion'].round(2)

economic = economic.drop(['Lat', 'Long', 'Country Code', 'Continent'], axis=1)
economic.head()

In [None]:
economic.sort_values('Mortality', ascending=False).head()

## GDP Healthcare
We compare the percentage of gdp spent on healthcare with cases per milion.

In [None]:
cols = ['Cases Per Milion', 'Deaths Per Milion']

for col in cols:
    plt.figure(figsize=figsize)
    plt.scatter(economic['GDP Healthcare %'], economic[col], s=2)
    plt.xlabel('GDP Healthcare %')
    plt.ylabel(f'{col}')
    plt.tight_layout()
    plt.show()
    break

## Population

We compare the proportion of people living in different areas to the mortality rate.

In [None]:
cols = ['Urban Population %', 
        'Rural Population %', 
        'Slum Population %']

for col in cols:
    plt.figure(figsize=figsize)
    plt.scatter(economic[col], economic['Mortality'], s=2)
    plt.ylabel('Mortality %')
    plt.xlabel(f'{col}')
    plt.tight_layout()
    plt.show()
    break    

## GDP & Healthcare
We compare life expectancy, healthcare spending and gdp per capita to the mortality rate.

In [None]:
cols = ['Life Expectancy', 'GDP Healthcare %', 'GDP Per Capita']

for col in cols:
    plt.figure(figsize=figsize)
    plt.scatter(economic[col], economic['Mortality'], s=2)
    plt.ylabel('Mortality %')
    plt.xlabel(f'{col}')
    plt.tight_layout()
    plt.show()
    break    

# Fancy plot

Visual for repo readme.

In [None]:
countries = ['Germany', 
             'France', 
             'Italy', 
             'Spain', 
             'United Kingdom', 
             'Russia', 
             'India', 
             'Brazil',
             'US', 
             'Poland', 
             'Mexico']

width = 1600
height = width / 2
dpi = 200
period = 7
step = 14
label_size = 12
n_clabels = 6

countries = sorted(countries)

plot_df = conf_diff[countries]
plot_df = plot_df.rename(columns={'United Kingdom': 'UK'})
countries = plot_df.columns.to_list()
plot_df = plot_df.rolling(period)
plot_df = plot_df.mean()
plot_df = plot_df.dropna()
plot_df = plot_df.to_numpy()
plot_df = plot_df.astype(float)
plot_df = plot_df.transpose()
plot_df = np.sqrt(plot_df)

xticks = range(plot_df.shape[1])[::step]
xlabels = list(conf_diff['Date'])[period:]
xlabels = [x.date() for x in xlabels]
xlabels = xlabels[::step]

yticks = range(len(countries))
ylabels = countries

cticks = np.round(np.linspace(0, np.max(plot_df), 6), -1)
cticks = cticks.astype(np.int)
clabels = np.power(cticks, 2)
cticks = sorted(set(cticks))
clabels = np.power(cticks, 2)
clabels = [round(x, -3) for x in clabels]
clabels = list(map(str, clabels))

plt.figure(figsize=(width / dpi, height / dpi))
plt.imshow(plot_df, aspect='auto', interpolation='nearest')
plt.set_cmap('hot')

plt.yticks(ticks=yticks,
           labels=ylabels, 
           fontsize=label_size, 
           verticalalignment='center')

plt.xticks(ticks=xticks,
           labels=xlabels,
           rotation=45, 
           fontsize=label_size, 
           horizontalalignment='right')

cbar = plt.colorbar()
cbar.set_ticks(cticks)
cbar.set_ticklabels(clabels)
cbar.ax.tick_params(labelsize=label_size) 

plt.title('New COVID-19 cases', fontsize=20)

plt.tight_layout()
plt.savefig('img/covid_tiles.png')

plt.show()