# COVID-19 Growth Analysis
> Visualizations of the growth of COVID-19.

- comments: true
- author: Thomas Wiecki
- categories: [growth]
- image: images/covid-growth.png
- permalink: /growth-analysis/

In [1]:
#hide
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
import seaborn as sns

import requests
import io

sns.set_context('talk')
plt.style.use('seaborn-whitegrid')

In [2]:
pd.set_option('display.max_rows', 500)

In [3]:
#hide
def load_timeseries(name, 
                    base_url='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series'):
    import requests
    # Thanks to kasparthommen for the suggestion to directly download
    url = f'{base_url}/time_series_19-covid-{name}.csv'
    csv = requests.get(url).text
    df = pd.read_csv(io.StringIO(csv), 
                     index_col=['Country/Region', 'Province/State', 'Lat', 'Long'])
    df['type'] = name.lower()
    df.columns.name = 'date'
    
    df = (df.set_index('type', append=True)
            .reset_index(['Lat', 'Long'], drop=True)
            .stack()
            .reset_index()
            .set_index('date')
         )
    df.index = pd.to_datetime(df.index)
    df.columns = ['country', 'state', 'type', 'cases']
    
    # Move HK to country level
    df.loc[df.state =='Hong Kong', 'country'] = 'Hong Kong'
    df.loc[df.state =='Hong Kong', 'state'] = np.nan
    
    df_us = df.copy()

    obs_states = ['California', 'New York', 'Washington',
                  'Idaho', 'Missouri', 'Nebraska', 'Tennessee', 'Texas', 'Wyoming']
    df_us.loc[(df_us.state =='California') | 
           (((df_us['state'].str.replace('.', '').str[-2:]) == 'CA') & 
            (df_us['country'] == 'US')), 'country'] = 'California'
    df_us.loc[df_us.state =='California', 'state'] = np.nan

    df_us.loc[(df_us.state =='New York') | 
           (((df_us['state'].str.replace('.', '').str[-2:]) == 'NY') & 
            (df_us['country'] == 'US')), 'country'] = 'New York'
    df_us.loc[df_us.state =='New York', 'state'] = np.nan

    df_us.loc[(df_us.state =='Washington') | 
           (((df_us['state'].str.replace('.', '').str[-2:]) == 'WA') & 
            (df_us['country'] == 'US')), 'country'] = 'Washington'
    df_us.loc[df_us.state =='Washington', 'state'] = np.nan
    
    df_us.loc[(df_us.state =='Maryland') | 
           (((df_us['state'].str.replace('.', '').str[-2:]) == 'MD') & 
            (df_us['country'] == 'US')), 'country'] = 'Maryland'
    df_us.loc[df_us.state =='Maryland', 'state'] = np.nan    

    df_us.loc[(df_us.state.isin(['Idaho', 'Missouri', 'Nebraska', 'Tennessee', 'Texas', 'Wyoming'])) | 
           (((df_us['state'].str.replace('.', '').str[-2:]).isin(
               ['ID', 'MO', 'NE', 'TN', 'TX', 'WY'])) & 
            (df_us['country'] == 'US')), 'country'] = 'No mitigation US'
    df_us.loc[df_us.state =='No mitigation US', 'state'] = np.nan
    
    
    df_us = df_us[df_us['country'].isin(['California', 'New York', 'Washington',
                                         'Maryland',
                                         'No mitigation US'])].copy()
    
#     # Move CA, WA,  to country level
#     df_us = df[df['country'] == 'US'].copy()
#     df_us['last2'] = np.where(df_us['country'] == 'US', 
#                               df_us['state'].str.replace('.', '').str[-2:], 
#                               None)
#     df_us['state2'] = df_us['last2'].map(abbrev_us_state)
#     df_us['country'] = np.where(df_us['state2'].notnull(), df_us['state2'], df_us['state'])
#     df_us['state'] = np.nan
    
    # Aggregate large countries split by states
    df = pd.concat([df, 
                    (df.loc[~df.state.isna()]
                     .groupby(['country', 'date', 'type'])
                     .sum()
                     .rename(index=lambda x: x+' (total)', level=0)
                     .reset_index(level=['country', 'type'])),
                    (df_us.groupby(['country', 'date', 'type'])
                     .sum()
                     .reset_index(level=['country', 'type']))
                   ])
    return df

df_confirmed = load_timeseries('Confirmed')

TypeError: unsupported operand type(s) for -: 'str' and 'int'

In [None]:
# Estimated critical cases
p_crit = .05
df_confirmed = df_confirmed.assign(cases_crit=df_confirmed.cases*p_crit)

# Compute days relative to when 100 confirmed cases was crossed
df_confirmed.loc[:, 'days_since_100'] = np.nan
for country in df_confirmed.country.unique():
    df_confirmed.loc[(df_confirmed.country == country), 'days_since_100'] = \
        np.arange(-len(df_confirmed.loc[(df_confirmed.country == country) & (df_confirmed.cases < 100)]), 
                  len(df_confirmed.loc[(df_confirmed.country == country) & (df_confirmed.cases >= 100)]))
    
annotate_kwargs = dict(
    s='Based on COVID Data Repository by Johns Hopkins CSSE ({})'.format(df_confirmed.index.max().strftime('%B %d, %Y')), 
    xy=(0.05, 0.01), xycoords='figure fraction', fontsize=10)

In [None]:
df_confirmed.index.max()

In [None]:
#hide
# Country names seem to change quite a bit
df_confirmed.country.unique()

In [None]:
#hide
european_countries = ['Italy', 'Germany', 'France (total)', 'Spain', 'United Kingdom (total)', 
                      'Iran']
large_engl_countries = ['US (total)', 'Canada (total)', 'Australia (total)']
asian_countries = ['Singapore', 'Japan', 'Korea, South', 'Hong Kong']
south_american_countries = ['Argentina', 'Brazil', 'Colombia', 'Chile']
us_states = ['Washington', 'New York', 'California', 'No mitigation US']

# country_groups = [european_countries, large_engl_countries, asian_countries, south_american_countries, us_states]
country_groups = [european_countries, large_engl_countries, asian_countries, us_states]
line_styles = ['-', ':', '--', '-.']

In [None]:
#collapse-hide

def plot_countries(df, countries, min_cases=100, ls='-', col='cases'):
    for country in countries:
        df_country = df.loc[(df.country == country) & (df.cases >= min_cases)]
        if len(df_country) == 0:
            continue
        df_country.reset_index()[col].plot(label=country, ls=ls)
        
sns.set_palette(sns.hls_palette(8, l=.45, s=.8)) # 8 countries max
fig, ax = plt.subplots(figsize=(12, 8))

for countries, ls in zip(country_groups, line_styles):
    plot_countries(df_confirmed, countries, ls=ls)

x = np.linspace(0, plt.xlim()[1] - 1)
ax.plot(x, 100 * (1.33) ** x, ls='--', color='k', label='33% daily growth')

ax.set(yscale='log',
       title='Exponential growth of COVID-19 across countries',
       xlabel='Days from first 100 confirmed cases',
       ylabel='Confirmed cases (log scale)')
ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax.legend(bbox_to_anchor=(1.0, 1.0))
ax.annotate(**annotate_kwargs)
sns.despine();

In [None]:
#hide
# This creates a preview image for the blog post and home page
fig.savefig('images/covid-growth.png')

In [None]:
#collapse-hide
fig, ax = plt.subplots(figsize=(12, 8))

for countries, ls in zip(country_groups, line_styles):
    plot_countries(df_confirmed, countries, ls=ls)

x = np.linspace(0, plt.xlim()[1] - 1)
ax.plot(x, 100 * (1.33) ** x, ls='--', color='k', label='33% daily growth')

ax.set(title='Exponential growth of COVID-19 across countries',
       xlabel='Days from first 100 confirmed cases',
       ylabel='Confirmed cases', ylim=(0, 30000))
ax.legend(bbox_to_anchor=(1.0, 1.0))
ax.annotate(**annotate_kwargs)
sns.despine();

In [None]:
#collapse-hide
smooth_days = 4
fig, ax = plt.subplots(figsize=(14, 8))
df_confirmed['pct_change'] = (df_confirmed
                              .groupby('country')
                              .cases
                              .pct_change()
                              .rolling(smooth_days)
                              .mean()
)

for countries, ls in zip(country_groups, line_styles):
    (df_confirmed.set_index('country')
                 .loc[countries]
                 .loc[lambda x: x.cases > 100]
                 .reset_index()
                 .set_index('days_since_100')
                 .groupby('country', sort=False)['pct_change']
                 .plot(ls=ls)
    )

ax.set(ylim=(0, 1),
       xlim=(0, 20),
       title='Are we seeing changes in daily growth rate?',
       xlabel='Days from first 100 confirmed cases',
       ylabel='Daily percent change (smoothed over {} days)'.format(smooth_days),
)
ax.axhline(.33, ls='--', color='k')
ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax.legend(bbox_to_anchor=(1.0, 1.0))
sns.despine()
ax.annotate(**annotate_kwargs);

In [None]:
for c in ['Washington', 'New York', 'California', 'No mitigation US', 'Maryland', 'US (total)']:
    print(c)
    print(df_confirmed.loc[lambda x: (x.country == c) & (x.cases > 10)]['cases'])
    df_confirmed.loc[lambda x: (x.country == f'{c}') & (x.cases > 10)].cases.plot(title=f'Confirmed: {c}')
    plt.yscale('log')
    plt.show()

In [None]:
country = 'US (total)'

In [None]:
#### collapse-hide
sns.set_palette(sns.hls_palette(8, l=.45, s=.8)) # 8 countries max
fig, ax = plt.subplots(figsize=(12, 8))

daily_growth_rate = .35

# 94837 ICU beds in US as of 2015, 68% occupied as of 2005
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5520980/
icu_beds = 94837
icu_beds_free = (1 - .68)

df_tmp = df_confirmed.loc[lambda x: (x.country == f'{country}') & (x.cases > 100)].cases_crit
df_tmp.plot(ax=ax)

x = np.linspace(0, 30, 30)
pd.Series(index=pd.date_range(df_tmp.index[0], periods=30),
          data=100*p_crit * (1 + daily_growth_rate) ** x).plot(ax=ax,ls='--', color='k', label=f'{daily_growth_rate:.0%} daily growth')

ax.axhline(icu_beds, color='.3', ls='-.', label='Total ICU beds')
ax.axhline(icu_beds * icu_beds_free, color='.5', ls=':', label='Free ICU beds')
ax.set(yscale='log',
       title=f'When will {country} run out of ICU beds?',
       ylabel='Expected critical cases (assuming {:.0f}% critical)'.format(100 * p_crit),
)
ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax.legend(bbox_to_anchor=(1.0, 1.0))
sns.despine()
ax.annotate(**annotate_kwargs);

Updated daily by [GitHub Actions](https://github.com/features/actions).

This visualization was made by [Thomas Wiecki](https://twitter.com/twiecki)[^1].

[^1]:  Data sourced from ["2019 Novel Coronavirus COVID-19 (2019-nCoV) Data Repository by Johns Hopkins CSSE"](https://systems.jhu.edu/research/public-health/ncov/) [GitHub repository](https://github.com/CSSEGISandData/COVID-19) and recreates the (pay-walled) plot in the [Financial Times]( https://www.ft.com/content/a26fbf7e-48f8-11ea-aeb3-955839e06441). This code is provided under the [BSD-3 License](https://github.com/twiecki/covid19/blob/master/LICENSE). Link to [original notebook](https://github.com/twiecki/covid19/blob/master/covid19_growth.ipynb).