In [1]:
import json
import pandas as pd

with open("../data/timeseries.json") as fp:
    raw_data = json.load(fp)
    raw_dfs = {k: pd.DataFrame(v) for k,v in raw_data.items()}
    
    dfs = []
    for country, df in raw_dfs.items():
        df['date'] = pd.to_datetime(df['date'])
        df['week'] = df['date'].apply(lambda t: t.week)
        df['week'] = df['week'] - df['week'].min()
        df['new'] = df['confirmed'].diff().fillna(0)
        df = df.groupby('week').agg(confirmed=('confirmed', 'max'), new=('new', 'sum')).reset_index()
        df['country'] = country
        df = df[(df['confirmed'] > 10) & (df['new'] > 10)]
        dfs.append(df)
        
    raw_dfs = pd.concat(dfs)

In [2]:
raw_dfs

Unnamed: 0,week,confirmed,new,country
7,7,16,12.0,Afghanistan
8,8,40,24.0,Afghanistan
9,9,120,80.0,Afghanistan
7,7,42,42.0,Albania
8,8,89,47.0,Albania
...,...,...,...,...
7,7,38,22.0,West Bank and Gaza
8,8,52,14.0,West Bank and Gaza
9,9,109,57.0,West Bank and Gaza
9,9,18,18.0,Mali


In [3]:
import altair as alt

In [4]:
totals = raw_dfs.groupby('country').agg(total=('confirmed', 'max'))
countries = list(totals.index[totals['total'] > 3000]) + ['Cuba']

# countries = ['US', 'Italy', 'China', 'Spain', 'France', 'United Kingdom', 'Germany', 'Cuba', 'Korea, South', 'Japan']
# countries = raw_data.keys()
data = raw_dfs[raw_dfs['country'].isin(countries)]

chart = alt.Chart(data).mark_line().encode(
    x=alt.X('confirmed', scale=alt.Scale(type='log')),
    y=alt.Y('new', scale=alt.Scale(type='log')),
    color='country',
    tooltip='country',
) + alt.Chart(data).mark_point().encode(
    x=alt.X('confirmed', scale=alt.Scale(type='log')),
    y=alt.Y('new', scale=alt.Scale(type='log')),
    color='country',
)

chart.properties(width=700, height=700).interactive()