In [1]:
# %load_ext autoreload
# %autoreload 2

import json
import os
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import requests
import pandas as pd
import altair as alt

if os.getcwd().split('/')[-1] == 'notebooks':
    os.chdir('..')

plt.style.use('ggplot')
alt.themes.enable('fivethirtyeight')
CHARTS_DIR = Path('../covid19-analysis/layouts/partials/covid')

ImportError: No module named pathlib

In [2]:
from fetch import fetch_timeseries, TS_URL

df = fetch_timeseries(TS_URL)
df_long = df.stack().rename('count').rename_axis(index={None: 'status'})

display(df.head())
display(df_long.head())

# Timeseries

Unnamed: 0_level_0,Unnamed: 1_level_0,confirmed,deaths,recovered
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,2020-01-22,0,0,0
Afghanistan,2020-01-23,0,0,0
Afghanistan,2020-01-24,0,0,0
Afghanistan,2020-01-25,0,0,0
Afghanistan,2020-01-26,0,0,0


country      date        status   
Afghanistan  2020-01-22  confirmed    0
                         deaths       0
                         recovered    0
             2020-01-23  confirmed    0
                         deaths       0
Name: count, dtype: int64

In [3]:
from IPython.display import display
from importlib import reload
import charts
import render
reload(charts)
reload(render)
from charts import *
from render import make_chart

# alt.data_transformers.enable('default', max_rows=None)
alt.data_transformers.enable('data_server')

data_long = make_data_long(df_long)
dod_long = make_dod(df_long).reset_index()

base_ts =  (alt.Chart(data_long).encode(x='date:T'))
selection_legend, selection_tooltip = make_ts_selections()
ts_chart = make_ts_chart(base_ts, sorted(dod_long.status.unique()), selection_legend, selection_tooltip)

map_data = make_map_data(data_long, countries)
map_chart = make_map(map_data, status_schemes)

dod_chart = make_dod_chart(dod_long)
chart = combine_map_ts(map_chart, ts_chart, dod_chart, selection_legend)

chart

In [4]:
(map_data
                .set_index(['country', 'id', 'day', 'date', 'status'])
                .unstack()
                ['count']
                .reset_index()
)

status,country,id,day,date,confirmed,deaths,recovered
0,Afghanistan,4,95,2020-04-04,299.0,7.0,10.0
1,Albania,8,95,2020-04-04,333.0,20.0,99.0
2,Algeria,12,95,2020-04-04,1251.0,130.0,90.0
3,Andorra,20,95,2020-04-04,466.0,17.0,21.0
4,Angola,24,95,2020-04-04,10.0,2.0,2.0
...,...,...,...,...,...,...,...
171,Uzbekistan,860,95,2020-04-04,266.0,2.0,25.0
172,Venezuela,862,95,2020-04-04,155.0,7.0,52.0
173,Vietnam,704,95,2020-04-04,240.0,,90.0
174,Zambia,894,95,2020-04-04,39.0,1.0,2.0


In [5]:
map_data

Unnamed: 0,country,status,date,count,id,scheme,day
0,Afghanistan,confirmed,2020-04-04,299.0,4,reds,95
1,Afghanistan,deaths,2020-04-04,7.0,4,greys,95
2,Afghanistan,recovered,2020-04-04,10.0,4,greens,95
3,Albania,confirmed,2020-04-04,333.0,8,reds,95
4,Albania,deaths,2020-04-04,20.0,8,greys,95
...,...,...,...,...,...,...,...
441,Zambia,confirmed,2020-04-04,39.0,894,reds,95
442,Zambia,deaths,2020-04-04,1.0,894,greys,95
443,Zambia,recovered,2020-04-04,2.0,894,greens,95
444,Zimbabwe,confirmed,2020-04-04,9.0,716,reds,95


## Attempt: day slider in heatmap

In [6]:
countries = alt.topo_feature(data.world_110m.url, 'countries')

china_data = map_data.query('country == "China"').query('status == "confirmed"')
min_day = china_data.day.min()
max_day = china_data.day.max()
# china_data = china_data.pivot(index='fips', columns='year', values='Pill_per_pop').reset_index()
china_data = china_data.set_index(['id', 'day'])['count'].unstack().reset_index()
china_data.columns = china_data.columns.map(str)
columns = list(china_data.columns.difference({'id'}))

slider = alt.binding_range(min=min_day,
                           max=max_day,
                           step=1)

select_day = alt.selection_single(name='day',
                                   fields=['day'],
                                   bind=slider,
                                #   on='none',
                                 init={'day': min_day}
                                 )

china_map = (alt
             .Chart(countries)
             .encode(
                 tooltip=['count:Q', 
                          'country:N', 
                          'day:Q'])
             .mark_geoshape()
             .encode(
                 color=alt.Color('count:Q', scale=alt.Scale(scheme='reds'))
             )
             .transform_lookup(
                 lookup='id',
                 from_=alt.LookupData(data=china_data,
                                      key='id', 
                                      fields=columns)
             ).transform_fold(
                 columns, as_=['day', 'count']
             ).add_selection(select_day)
             .transform_filter(select_day)
       )

china_map

In [7]:
china_data

day,id,95
0,156,82543.0


In [8]:
columns

['95']

In [9]:
china_data.columns

Index(['id', '95'], dtype='object', name='day')

# Correlations

In [10]:
import seaborn as sns

df.groupby('country').apply(lambda f: f.fillna(method='ffill')).fillna(0).corr().pipe(sns.heatmap, annot=True)

<matplotlib.axes._subplots.AxesSubplot object at 0x7fb047787358>

In [11]:
df.groupby(['date']).sum().corr()

Unnamed: 0,confirmed,deaths,recovered
confirmed,1.0,0.996935,0.959665
deaths,0.996935,1.0,0.946758
recovered,0.959665,0.946758,1.0


In [12]:
def correlation_lags(df, column='deaths', max_lag=20, group=False):
    def series_corr(f):
        return pd.Series({t: f['confirmed'].shift(t).iloc[:-20].corr(f[column]) for t in range(max_lag)})
    if group:
        return df.groupby('country').apply(series_corr).idxmax(axis=1)
    else:
        return df.pipe(series_corr).idxmax(axis=0)

world_ts = df.groupby('date').sum()
days_to_death = correlation_lags(world_ts, 'deaths')
days_to_recov = correlation_lags(world_ts, 'recovered')

In [13]:
days_to_death, days_to_recov

(0, 19)

In [14]:
fatality_rates = (df.join(correlation_lags(df, group=True, column='deaths').rename('days_to_death').fillna(0).astype(int))
                  .groupby('country')
                  .apply(lambda g: g.deaths.div(g.confirmed.shift(g['days_to_death'].iloc[0])))
                  .reset_index(0, drop=True)
                 )
fatality_rates.groupby('date').mean().plot()

<matplotlib.axes._subplots.AxesSubplot object at 0x7fb047787358>

In [15]:
world_ts.deaths.div(world_ts.confirmed.shift(days_to_death)).plot()

<matplotlib.axes._subplots.AxesSubplot object at 0x7fb047787358>