In [1]:
import os
import glob
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import plotly.express as px
from plotly.offline import plot
import plotly.graph_objects as go

In [97]:
path = '/Users/andrewjpeters/Documents/GitHub/ihme/ihme_data'
extension = 'csv'
df_dict = {}

os.chdir(path)
result = glob.glob('*.{}'.format(extension))
for file in result:
    month = file[2:4]
    day = file[0:2]
    df_dict[month + '-' + day] = pd.read_csv(path + '/' + file)

In [98]:
df = pd.DataFrame()

for key, df_part in df_dict.items():
    date = datetime.strptime(key + '-2020', '%m-%d-%Y')
    df_part['version'] = date.strftime('%Y-%m-%d')
    if 'date_reported' in df_part.columns:
        df_part['date'] = df_part['date_reported']
    if 'location_name' in df_part.columns:
        df_part['location'] = df_part['location_name'] 
    df_part = df_part.sort_values('date')    
    df = df.append(df_part[['location', 'deaths_lower', 'deaths_mean', 'deaths_upper','date', 'version']], ignore_index=True)

df = df.sort_values(['version', 'date'])

In [106]:
df_usa = df[df['location'] == 'United States of America']
df_usa.head()

Unnamed: 0,location,deaths_lower,deaths_mean,deaths_upper,date,version
89635,United States of America,0.0,0.0,0.0,2020-02-06,2020-03-26
89717,United States of America,0.0,0.0,0.0,2020-02-07,2020-03-26
89787,United States of America,0.0,0.0,0.0,2020-02-08,2020-03-26
89798,United States of America,0.0,0.0,0.0,2020-02-09,2020-03-26
89900,United States of America,0.0,0.0,0.0,2020-02-10,2020-03-26


In [107]:
cumulative_deaths_lower = []
cumulative_deaths_mean = []
cumulative_deaths_upper = []

for version in df_usa.version.unique():
    values_lower = df_usa[df_usa['version'] == version].deaths_lower.cumsum().values
    values_mean = df_usa[df_usa['version'] == version].deaths_mean.cumsum().values
    values_upper = df_usa[df_usa['version'] == version].deaths_upper.cumsum().values
    cumulative_deaths_lower.extend(values_lower)
    cumulative_deaths_mean.extend(values_mean)
    cumulative_deaths_upper.extend(values_upper)
df_usa['cumulative_deaths_lower'] = cumulative_deaths_lower
df_usa['cumulative_deaths_mean'] = cumulative_deaths_mean
df_usa['cumulative_deaths_upper'] = cumulative_deaths_upper

In [102]:
ecdc_df = pd.read_csv('/Users/andrewjpeters/Documents/GitHub/ihme/ecdc_data.csv')
ecdc_df = ecdc_df[
    (ecdc_df['location'] == 'United States')
    & (ecdc_df['date'] >= '2020-03-26')
    & (ecdc_df['date'] < '2020-07-01')
    ]

In [108]:
cumulative_deaths_lower = []
cumulative_deaths_mean = []
cumulative_deaths_upper = []
date_list = []
valid_until = []

for date in ecdc_df.date.unique():   
    version = [x for x in df_usa.version.unique() if x <= date][-1]
    cum_deaths_lower_values = df_usa[df_usa['version'] == version]['cumulative_deaths_lower']
    cum_deaths_mean_values = df_usa[df_usa['version'] == version]['cumulative_deaths_mean']
    cum_deaths_upper_values = df_usa[df_usa['version'] == version]['cumulative_deaths_upper']
    date_values = df_usa[df_usa['version'] == version]['date']
    cumulative_deaths_lower.extend(cum_deaths_lower_values)
    cumulative_deaths_mean.extend(cum_deaths_mean_values)
    cumulative_deaths_upper.extend(cum_deaths_upper_values)
    date_list.extend(date_values)
    valid_until.extend([date] * len(date_values))

In [109]:
filled_df = pd.DataFrame(list(zip(cumulative_deaths_lower, cumulative_deaths_mean, cumulative_deaths_upper, date_list, valid_until)), columns = ['cumulative_deaths_lower', 'cumulative_deaths_mean', 'cumulative_deaths_upper', 'date_list', 'valid_until'])

In [111]:
filled_df.tail()

Unnamed: 0,cumulative_deaths_lower,cumulative_deaths_mean,cumulative_deaths_upper,date_list,valid_until
9159,94428.05,134475.185,242789.775,2020-08-20,2020-05-09
9160,94428.05,134475.185,242789.775,2020-08-21,2020-05-09
9161,94428.05,134475.185,242789.775,2020-08-22,2020-05-09
9162,94428.05,134475.185,242789.775,2020-08-23,2020-05-09
9163,94428.05,134475.185,242789.775,2020-08-24,2020-05-09


In [125]:
trace1 = go.Scatter(x = filled_df.date_list.values[:2],
                    y = filled_df.cumulative_deaths_mean.values[:2],
                    mode='lines',
                    name='IHME projected deaths',
                    line = dict(color='rgba(0, 63, 92, 0.80)', width=4))

trace2 = go.Scatter(x = filled_df.date_list.values[:2],
                    y = filled_df.cumulative_deaths_upper.values[:2],
                    fill=None,
                    mode='lines',
                    name='IHME projected deaths (upper)',
                    showlegend=False,
                    line = dict(color='rgba(0, 63, 92, 0.30)', width=4))

trace3 = go.Scatter(x = filled_df.date_list.values[:2],
                    y = filled_df.cumulative_deaths_lower.values[:2],
                    fill='tonexty',
                    fillcolor='rgba(0, 63, 92, 0.10)',
                    mode='lines',
                    name='IHME projected deaths (lower)',
                    showlegend=False,
                    line = dict(color='rgba(0, 63, 92, 0.30)', width=4))

trace4 = go.Scatter(x = ecdc_df.date,
                    y = ecdc_df.total_deaths,
                    mode='lines',
                    showlegend=False,
                    line = dict(color='rgba(214, 0, 3, 0.5)', width=2, dash='dot'))

trace5 = go.Scatter(x=ecdc_df.date[:2],
                    y=ecdc_df.total_deaths[:2],
                    mode='lines',
                    name='Actual Deaths',
                    line=dict(color='rgba(214, 0, 3, 0.75)', width=4))

trace6 = go.Scatter( x = ['2020-04-10'],
                    y = [100000],
                    mode='text',
                    showlegend=False,
                    line = dict(color='rgba(0, 63, 92, 0.80)', width=4))

frames = [dict(data= [dict(type='scatter',
                           x=filled_df[filled_df['valid_until'] == date]['date_list'],
                           y=filled_df[filled_df['valid_until'] == date]['cumulative_deaths_mean']),
                    dict(type='scatter',
                           x=filled_df[filled_df['valid_until'] == date]['date_list'],
                           y=filled_df[filled_df['valid_until'] == date]['cumulative_deaths_upper']),
                    dict(type='scatter',
                           x=filled_df[filled_df['valid_until'] == date]['date_list'],
                           y=filled_df[filled_df['valid_until'] == date]['cumulative_deaths_lower']),
                    dict(type='scatter',
                         x=ecdc_df.date,
                         y=ecdc_df.total_deaths),
                    dict(type='scatter',
                         x=ecdc_df.date[:k+1],
                         y=ecdc_df.total_deaths[:k+1]),
                    dict(type='scatter',
                         x = ['2020-04-10'],
                         y = [100000],
                         mode = 'text',
                         textposition='top center',
                         textfont_size = 14,
                         text = ['Actual Deaths and <br> IHME model as of ' + datetime.strftime(datetime.strptime(date,                                         '%Y-%m-%d'), '%b %d')])
                    
                           ],
               traces = [0, 1, 2, 3, 4, 5],  #this means that  frames[k]['data'][0]  updates trace1, and   frames[k]['data'][1], trace2 
              ) for k, date in enumerate(ecdc_df.date)] 

layout = go.Layout(
                   showlegend=True,
                   hovermode='closest',
                   updatemenus=[dict(type='buttons', showactive=False,
                                buttons=[dict(label='Play',
                                              method='animate',
                                              args=[None, 
                                                    dict(frame=dict(duration=3, 
                                                                    redraw=False),
                                                         transition=dict(duration=0),
                                                         fromcurrent=True,
                                                         mode='immediate')])])])


layout.update(xaxis =dict(range=['2020-03-20', '2020-06-21'], autorange=False),  yaxis =dict(range=[0, 140000], autorange=False))
fig = go.Figure(data=[trace1, trace2, trace3, trace4, trace5, trace6], frames=frames, layout=layout)

fig.write_html("graph.html")