# US's failure to control the virus
> Interactive layered plot of wealthy countrie's daily deaths per million

- toc: false
- comments: true
- image: images/daily_deaths.gif
- hide: false
- search_exclude: false
- categories: [interactive, NYT]
- author: Shantam Raj
- badges: true

Today we will make the first chart from the article [The unique US failure to control the virus](https://www.nytimes.com/2020/08/06/us/coronavirus-us.html)

![](images/daily_deaths.gif)

![](images/percent_pos_tests.gif)

![](images/reopened_w_cases.gif)

In [72]:
population_uri = 'https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv?raw=true'
deaths_ts_uri = 'https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv?raw=true'
gdp_current_us_dollars = 'https://gist.githubusercontent.com/armsp/58b43f28b4bf880f3874db80630dec44/raw/959a34a1797b0e3fdc860a6ef0057c62ee898dd7/gdp.csv'

In [73]:
deaths_ts = pd.read_csv(deaths_ts_uri)
deaths_ts.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,7/31/20,8/1/20,8/2/20,8/3/20,8/4/20,8/5/20,8/6/20,8/7/20,8/8/20,8/9/20
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,1272,1283,1284,1288,1288,1294,1298,1307,1312,1312
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,157,161,166,172,176,182,188,189,193,199
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,1210,1223,1231,1239,1248,1261,1273,1282,1293,1302
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,52,52,52,52,52,52,52,52,52,52
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,52,54,55,58,59,62,64,67,70,75


In [74]:
gdp_us = pd.read_csv(gdp_current_us_dollars)
gdp_us.head()

Unnamed: 0,Series Name,Series Code,Country Name,Country Code,2019 [YR2019]
0,GDP per capita (current US$),NY.GDP.PCAP.CD,Afghanistan,AFG,502.115486913067
1,GDP per capita (current US$),NY.GDP.PCAP.CD,Albania,ALB,5352.85741103671
2,GDP per capita (current US$),NY.GDP.PCAP.CD,Algeria,DZA,3948.34327892571
3,GDP per capita (current US$),NY.GDP.PCAP.CD,American Samoa,ASM,..
4,GDP per capita (current US$),NY.GDP.PCAP.CD,Andorra,AND,40886.3911648431


In [75]:
population = pd.read_csv(population_uri)
population.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Population
0,4,AF,AFG,4.0,,,,Afghanistan,33.93911,67.709953,Afghanistan,38928341.0
1,8,AL,ALB,8.0,,,,Albania,41.1533,20.1683,Albania,2877800.0
2,12,DZ,DZA,12.0,,,,Algeria,28.0339,1.6596,Algeria,43851043.0
3,20,AD,AND,20.0,,,,Andorra,42.5063,1.5218,Andorra,77265.0
4,24,AO,AGO,24.0,,,,Angola,-11.2027,17.8739,Angola,32866268.0


In [76]:
import pandas as pd
import altair as alt
from functools import wraps
import datetime as dt

def log_step(func):
    @wraps(func)
    def wrapper(dataf, *args, **kwargs):
        """timing or logging etc"""
        start = dt.datetime.now()
        output = func(dataf, *args, **kwargs)
        end = dt.datetime.now()
        print(f"After function {func.__name__} ran, shape of dataframe is - {output.shape}, execution time is - {end-start}")
        return output
    return wrapper

@log_step
def start_pipeline(dataf):
    return dataf.copy()

@log_step
def remove_cols(dataf, *arg, **kwargs):
    #print(list(arg))
    result = dataf.drop(columns=list(arg))
    return result 

@log_step
def remove_null(dataf):
    return dataf.dropna()

def rename_cols(dataf, *arg, **kwargs):
    """Rename column names of raw dataframes to something digestable and that looks better in visualization and does not have spaces in between cause altair does not like that"""
    result = dataf.rename(columns=kwargs)
    return result

@log_step
def filter_rows(dataf, which, **kwargs):
    if which == 'gdp':
        result = dataf[dataf['current_us'] != '..']
        return result
    elif which == 'pop':
        result = dataf[pd.isnull(dataf['Province_State'])]
        return result

def set_dtypes(dataf):
    """set the datatypes of columns"""
    # can use data.assign(col = lambda d: pd.to_datetime(d['col'])) or col = pd.to_datetime(d['col'])
    dataf['current_us'] = dataf['current_us'].astype(float)
    return dataf

# def remove_outliers(dataf):
#     """remove outliers"""
#     return dataf

# def add_features(dataf):
#     return dataf

@log_step
def clean(dataf):
    agg_deaths = dataf.groupby('Country/Region').sum().reset_index()
    agg_deaths = agg_deaths[agg_deaths['Country/Region'].isin(pop_w_gdp['Country/Region'])].set_index('Country/Region')
    result = agg_deaths.T.reset_index().rename_axis(None, axis=1).rename(columns={'index': 'Date'})

    #convert cumulative deaths to daily deaths per million
    for col in result:
        if col != 'Date':
            result[col] = result[col].diff()
            result[col] = (result[col]/int(countries_population[countries_population['Country/Region'] == col]['Population']))*1000000
    return result


In [77]:
gdp = (gdp_us
       .pipe(start_pipeline)
       .pipe(remove_null)
       .pipe(remove_cols, *['Series Name', 'Series Code'])
       .pipe(rename_cols, **{'2019 [YR2019]': 'current_us'})
       .pipe(filter_rows, 'gdp')
       .pipe(set_dtypes))

countries_population = (population
       .pipe(start_pipeline)
       .pipe(filter_rows, 'pop')
       .pipe(remove_cols, *['UID', 'iso2', 'code3', 'FIPS', 'Admin2', 'Province_State', 'Lat', 'Long_', 'Combined_Key'])
       .pipe(rename_cols, **{'iso3': 'Country Code', 'Country_Region':'Country/Region'}))

# Combining population with GDP
pop_w_gdp = countries_population.merge(gdp, how='inner', on='Country Code')

# Filter for only wealthy countries i.e GDP > 25000 USD and population > 10 million
pop_w_gdp = pop_w_gdp[(pop_w_gdp['current_us'] > 25000) & (pop_w_gdp['Population'] > 10000000)]

# Making daily deaths per million data for plotting
plot_data = (deaths_ts
             .pipe(start_pipeline)
             .pipe(remove_cols, *['Province/State', 'Lat', 'Long'])
             .pipe(clean)
             .pipe(remove_null)
            )

plot_data

After function start_pipeline ran, shape of dataframe is - (269, 5), execution time is - 0:00:00.000104
After function remove_null ran, shape of dataframe is - (264, 5), execution time is - 0:00:00.001648
After function remove_cols ran, shape of dataframe is - (264, 3), execution time is - 0:00:00.001438
After function filter_rows ran, shape of dataframe is - (222, 3), execution time is - 0:00:00.000781
After function start_pipeline ran, shape of dataframe is - (4153, 12), execution time is - 0:00:00.000478
After function filter_rows ran, shape of dataframe is - (188, 12), execution time is - 0:00:00.001278
After function remove_cols ran, shape of dataframe is - (188, 3), execution time is - 0:00:00.000900
After function start_pipeline ran, shape of dataframe is - (266, 205), execution time is - 0:00:00.000276
After function remove_cols ran, shape of dataframe is - (266, 202), execution time is - 0:00:00.001868
After function clean ran, shape of dataframe is - (201, 14), execution time

Unnamed: 0,Date,Australia,Belgium,Canada,France,Germany,Italy,Japan,"Korea, South",Netherlands,Spain,Sweden,US,United Kingdom
1,1/23/20,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,1/24/20,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,1/25/20,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,1/26/20,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,1/27/20,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196,8/5/20,0.314222,0.603989,0.132080,0.000000,0.190967,0.165394,0.039533,0.000000,0.175082,0.021388,1.287222,4.164311,1.988628
197,8/6/20,0.432055,0.172568,0.079248,0.168522,0.023871,0.099236,0.047440,0.019505,0.000000,0.021388,0.594102,3.794015,0.721798
198,8/7/20,0.471333,0.431421,0.105664,0.291083,0.167096,0.049618,0.063253,0.019505,0.058361,0.064165,-0.297051,3.772768,1.443597
199,8/8/20,0.667722,0.345137,0.184913,0.000000,0.071613,0.215012,0.000000,0.019505,0.233442,0.000000,0.000000,3.265888,0.810182


In [78]:
highlight = alt.selection_single(on='mouseover', empty='none', fields=['key'], nearest=True, init={'key': 'US'})

base = (alt.Chart(plot_data, title="Daily Deaths per million in wealthy countries")
        .transform_fold(list(plot_data.columns[1:]))
        .transform_window(deaths='mean(value)', frame=[-6,0], groupby=['key'])
        .encode(
            x=alt.X('Date:T', axis=alt.Axis(format="%b", tickCount=7), title=None),
            y=alt.Y('deaths:Q', axis=alt.Axis(tickCount=8), title=None),
            tooltip=['key:N']
        ).properties(height=600))

points = base.mark_circle().encode(
    opacity=alt.value(0)
).add_selection(highlight).properties(
    width=600
)

line = base.mark_line(stroke='grey').encode(
            #x='Date:T',
            #y='deaths:Q',
            #color='key:N',
            #color=alt.condition(alt.datum.key == 'US', alt.value('red'), alt.value('grey')),
            detail='key:N'
            #size=alt.condition(highlight, alt.value(5), alt.value(2)),
            
        )

top_layer = (base.transform_filter(highlight).mark_line()
            .encode(
                color=alt.condition(alt.datum.key == 'US', alt.value('red'), alt.value('grey')),
                size=alt.value(4)
            ))

usa_layer = (base.transform_filter(alt.datum.key == 'US').mark_line()
            .encode(
                color=alt.value('red'),
            ))


circles = base.mark_circle(size=100, filled=True, strokeOpacity=0.7, fillOpacity=0.3, dx=-20).encode(
    y=alt.Y('deaths:Q', aggregate={'argmax': 'Date'}),
    x=alt.X('Date:T', aggregate='max'),
    stroke=alt.condition(alt.datum.key=='US', alt.value('red'), alt.value('grey')),
    color=alt.condition(alt.datum.key=='US', alt.value('red'), alt.value('grey')),
)

text = circles.transform_filter({'field': 'key', 'oneOf': ['US', 'United Kingdom', 'Korea, South']}).mark_text(dx=10, align='left').encode(
    text='key:N'
)
(points+line+top_layer+usa_layer+circles+text).configure_view(strokeWidth=0).configure_axis(grid=True)