# COVID-19 - Fatality rates
> Evolution of fatality rates for different countries

- comments: true
- author: Caglar Cakan
- categories: [covid19]
- permalink: /covid-fatality-rates/
- image: images/covid-fatality-rates.png
- license: Creative Commons

In [1]:
#hide_input
import datetime
print('This page was last built on', datetime.datetime.now().strftime("%d.%m.%y %H:%M:%S"))

This page was last built on 11.05.20 01:12:28


In [2]:
#hide

# altair code borrowed from Pratap Vardhan, https://github.com/machine-learning-apps/covid19-dashboard/blob/master/_notebooks/2020-03-17-covid19-compare-country-death-trajectories.ipynb
# load_timeseries code borrowed from Thomas Wiecki & Hamel Husain, https://github.com/machine-learning-apps/covid19-dashboard/blob/master/_notebooks/2020-03-16-covid19_growth_bayes.ipynb
# Thanks!

import numpy as np

import pandas as pd

import requests
import io
import altair as alt

In [3]:
#hide
def load_timeseries(name, 
                    base_url='https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series'):
    import requests
    # Thanks to kasparthommen for the suggestion to directly download
    url = f'{base_url}/time_series_covid19_{name}_global.csv'
    csv = requests.get(url).text
    df = pd.read_csv(io.StringIO(csv), 
                     index_col=['Country/Region', 'Province/State', 'Lat', 'Long'])
    df['type'] = name.lower()
    df.columns.name = 'date'

    
    df = (df.set_index('type', append=True)
            .reset_index(['Lat', 'Long'], drop=True)
            .stack()
            .reset_index()
            .set_index('date')
         )
    df.index = pd.to_datetime(df.index)
    df.columns = ['country', 'state', 'type', 'cases']
    
    # Fix South Korea
    df.loc[df.country =='Korea, South', 'country'] = 'South Korea'
    
    # Move HK to country level
    df.loc[df.state =='Hong Kong', 'country'] = 'Hong Kong'
    df.loc[df.state =='Hong Kong', 'state'] = np.nan
    
    # Aggregate large countries split by states
    df = (df.groupby(["date", "country", "type"])
          .sum()
          .reset_index()
          .sort_values(["country", "date"])
          .set_index("date"))
    return df

In [4]:
#hide

def get_countries_with_min_cases_for_df(df_cases, by='cases', min_cases = 10):
    MIN_CASES = min_cases
    countries = df_cases[df_cases[by].ge(MIN_CASES)
           ].sort_values(by=by, ascending=False)
    countries = countries['country'].values
    return countries

def get_countries_with_min_cases(df_cases, by='cases', min_cases = 10):
    MIN_CASES = min_cases
    countries = df_cases[df_cases[by].ge(MIN_CASES)].sort_values(by=by, ascending=False)
    countries = countries['country'].unique()
    return countries

def filter_cases(df_cases, by='cases', min_cases = 10, since_cases=None):
    countries = get_countries_with_min_cases_for_df(df_cases, by, min_cases)
    if since_cases is None:
        SINCE_CASES_NUM = 100
    else:
        SINCE_CASES_NUM = since_cases
        
    COL_X = f'Days since case {SINCE_CASES_NUM}'
    dff2 = df_cases[df_cases['country'].isin(countries)].copy()
    dff2['date'] = dff2.index
    days_since = (dff2.assign(F=dff2[by].ge(SINCE_CASES_NUM))
                  .set_index('date')
                  .groupby('country')['F'].transform('idxmax'))
    dff2[COL_X] = (dff2['date'] - days_since.values).dt.days.values
    dff2 = dff2[dff2[COL_X].ge(0)]
    return dff2

In [5]:
#hide
def get_country_colors(x):
    mapping = {
        'Italy': 'black',
        'Iran': '#A1BA59',
        'South Korea': '#E45756',
        'Spain': '#F58518',
        'Germany': '#ab134e',
        'France': '#f5cb42',
        'US': '#2495D3',
        'Switzerland': '#9D755D',
        'Norway': '#C1B7AD',
        'United Kingdom': '#3250a8',
        'Netherlands': '#7430b3',
        'Sweden': '#C1B7AD',
        'Belgium': '#C1B7AD',
        'Denmark': '#C1B7AD',
        'Austria': '#C1B7AD',
        'Japan': '#9467bd'}
    return mapping.get(x, '#C1B7AD')

In [6]:
#hide

df_deaths = load_timeseries("deaths")
df_confirmed = load_timeseries("confirmed")

# concatenate both timeseries
df_cases = pd.concat((df_confirmed, df_deaths), axis=1)
df_cases.columns = ['country', 'type', 'confirmed', 'country', 'type', 'deaths']
df_cases = df_cases.loc[:,~df_cases.columns.duplicated()]

# dirty ratio
df_cases['ratio'] = df_cases.confirmed / (df_cases.deaths + 1)
df_cases['ratio_in'] = 1 / df_cases['ratio'] * 100

# create diffs
df_cases['new_deaths'] = df_cases.deaths.diff()
# flatten artifacts from one country to the next
#df_cases[df_cases['new_deaths']<0] = 0
df_cases.loc[df_cases['new_deaths']<0, 'new_deaths'] = 0 

df_cases['new_confirmed'] = df_cases.confirmed.diff()
#df_cases[df_cases['new_confirmed']<0] = 0
df_cases.loc[df_cases['new_confirmed']<0, 'new_confirmed'] = 0 

#hide
df_cases['fatality'] = (df_cases['deaths'] / df_cases['confirmed'].round(1))

In [7]:
#hide
MIN_CASES = 30000
SINCE_CASES = 50
dff2 = filter_cases(df_cases, 'confirmed', MIN_CASES, SINCE_CASES)

In [8]:
#hide
def make_since_chart(dff2, highlight_countries=[], baseline_countries=[]):
    
    max_date = dff2['date'].max()
    color_domain = list(dff2['country'].unique())
    color_range = list(map(get_country_colors, color_domain))


    selection = alt.selection_multi(fields=['country'], bind='legend', 
                                    init=[{'country': x} for x in highlight_countries + baseline_countries])
    
    COL_X = f'Days since case {SINCE_CASES}'
    
    base = alt.Chart(dff2, width=550).encode(
        x=f'{COL_X}:Q',
        y=alt.Y('ratio_in:Q', axis=alt.Axis(title='Fatality rate (%)')),
        color=alt.Color('country:N', scale=alt.Scale(domain=color_domain, range=color_range)),
        tooltip=list(dff2),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
    )    
    
    # 33% growth reference -----------
    #print(dff2)
    max_day = dff2[COL_X].max()
    ref = pd.DataFrame([[x, SINCE_CASES*1.33**x] for x in range(max_day+1)], columns=[COL_X, 'Confirmed Cases'])
    base_ref = alt.Chart(ref).encode(x=f'{COL_X}:Q', y='Confirmed Cases:Q')
    
    return (
        base.mark_line(point=True).add_selection(selection) + 
        base.transform_filter(
            alt.datum['date'] >= int(max_date.timestamp() * 1000)
        ).mark_text(dy=-8, align='right', fontWeight='bold').encode(text='country:N') #+  
    ).properties(
        title=f"Fatality rates per country"
    )

# Cummulative confirmed cases per deaths

The fatality rate is calculated as the cumulative number of confirmed cases divided by the cumulative number of deaths.

`click` the country label that you want to highlight. `Shift` + `click` to compare different countries.

In [9]:
#hide_input
chart = make_since_chart(dff2[dff2.country != "China"], ["Iran"],  ['Germany', 'Italy', 'South Korea', 'US'])
chart