In [1]:
%load_ext autoreload
%autoreload 2

#standard imports
import h5py
import numpy as np
import os
import pandas as pd
import pickle
import torch as th
import yaml

from datetime import timedelta
from pathlib import Path

# bokeh
from bokeh.io import output_notebook, export_png, export_svgs
from bokeh.layouts import gridplot
from bokeh.models import HoverTool, Title, FactorRange, LinearAxis, Legend, Band, Range1d
from bokeh.palettes import Blues
from bokeh.plotting import figure, show, output_file, ColumnDataSource
from bokeh.transform import factor_cmap
#from bokeh.sampledata.us_counties import data as counties
output_notebook()

# lib
import sys
sys.path.append('../')
from metrics import compute_metrics, _compute_metrics
from analysis import load_backfill, plot_cases, plot_metric_for_dates

In [8]:
region = 'New York'
region_short = 'ny'
case_type = ('cases', 'infections')
f_ground_truth = f'../data/usa/data_{case_type[0]}_ny.csv'
other_forecasts = {
    #'YYG': (f'/checkpoint/mattle/covid19/csvs/{case_type[1]}/yyg/counts_{{}}.csv', '#f29111', 'solid'),
    'Los Alamos': (f'/checkpoint/mattle/covid19/csvs/{case_type[1]}/los_alamos/counts_{{}}.csv', "#009ed7", 'solid')
}

def aggregate_region(df):
    df = df.sum(axis=1).to_frame()
    df.columns = [region]
    return df

### Progression of Cases

In [9]:
# Load ground truth data
df_region = pd.read_csv(f_ground_truth, index_col='region').transpose()
df_region.index.set_names(['date'], inplace=True)
df_region.index = pd.to_datetime(df_region.index)
print('Days = {}, Regions = {}'.format(*df_region.shape))
display(df_region.tail())

# plot cases over time 
print(df_region.shape)
p = plot_cases(df_region, f"Deaths in {region}", show_hover=False)
show(p)

df_aggr = aggregate_region(df_region)

Days = 161, Regions = 62


region,"Albany, New York","Allegany, New York","Bronx, New York","Broome, New York","Cattaraugus, New York","Cayuga, New York","Chautauqua, New York","Chemung, New York","Chenango, New York","Clinton, New York",...,"Sullivan, New York","Tioga, New York","Tompkins, New York","Ulster, New York","Warren, New York","Washington, New York","Wayne, New York","Westchester, New York","Wyoming, New York","Yates, New York"
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-06-25,2076.0,58.0,47294.0,680.0,121.0,109.0,121.0,139.0,144.0,101.0,...,1448.0,141.0,175.0,1765.0,263.0,245.0,154.0,34642.0,93.0,44.0
2020-06-26,2084.0,58.0,47383.0,697.0,123.0,109.0,122.0,139.0,145.0,101.0,...,1449.0,141.0,175.0,1768.0,263.0,245.0,159.0,34700.0,94.0,45.0
2020-06-27,2091.0,58.0,47456.0,701.0,123.0,111.0,123.0,139.0,146.0,101.0,...,1451.0,142.0,175.0,1773.0,263.0,245.0,161.0,34748.0,95.0,45.0
2020-06-28,2097.0,59.0,47514.0,703.0,123.0,113.0,123.0,140.0,146.0,101.0,...,1451.0,142.0,176.0,1778.0,263.0,246.0,162.0,34780.0,95.0,45.0
2020-06-29,2099.0,59.0,47555.0,710.0,123.0,114.0,123.0,140.0,146.0,101.0,...,1451.0,142.0,177.0,1778.0,263.0,246.0,165.0,34798.0,95.0,46.0


(161, 62)


### Load Backfill and Configs

In [49]:
job = "nystate/2020_06_29_16_11"
job = "nystate/2020_06_29_17_00"
job = "nystate/2020_06_29_18_38"
job = "nystate/2020_06_30_07_16"
job = "nystate/2020_06_30_18_11_34"

#fs, cfgs = load_backfill(job, model='ar', indicator="*_forecast.csv", forecast="final_model_best_mae_forecast.csv") 
fs, cfgs = load_backfill(job, model='car', forecast="best_rmse")
cfgs.drop(columns=['fdat', 'fpop', 'job'])

Unnamed: 0_level_0,activation,decay,eta,granger,loss,lr,momentum,niters,t0,test_on,timger,weight_decay,window
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-04-15,sigmoid,latent2_2,0.1,0.5,nb,0.001,0.99,30000,0,21,10,0.1,20
2020-04-19,sigmoid,latent2_2,0.3,0.5,nb,0.001,0.99,30000,0,21,1,0.2,20
2020-04-22,sigmoid,latent2_2,0.2,0.5,nb,0.001,0.99,30000,0,21,1,0.2,20
2020-04-26,sigmoid,latent2_2,0.3,0.5,nb,0.001,0.99,30000,0,21,1,0.1,20
2020-05-06,sigmoid,latent2_2,0.2,0.5,nb,0.001,0.99,30000,0,21,1,0.2,20
2020-05-10,sigmoid,latent2_2,0.2,0.5,nb,0.001,0.99,30000,0,21,20,0.2,20


### Forcast Comparison

Compare our forecasts to published data by 
- [Los Alamos National Laboratory](https://covid-19.bsvgateway.org/)
- [YYG](https://covid19-projections.com)

In [50]:
ps = plot_metric_for_dates(fs, df_aggr, cfgs.index, 'MAE', others=other_forecasts, f_aggr=aggregate_region)
grid = gridplot(ps, ncols=2, plot_width=430)
show(grid)
_ = export_svgs(grid, filename=f'/tmp/{region_short}_mae.svg')

In [53]:
def plot_error(fs, gt, date, title, regions=None, height=400, width=600, backend='svg'):
    df_ar = pd.read_csv(fs[date], index_col='date', parse_dates=['date'])
    ix = np.intersect1d(pd.to_datetime(df_ar.index), pd.to_datetime(gt.index))
    df_gt = gt.loc[ix]
    df_ar = df_ar.loc[ix]
    df = df_ar - df_gt
    source = ColumnDataSource(df)
    p = figure(
        x_axis_type="datetime",
        plot_height=height,
        plot_width=width,
        title=title,
        tools="save,hover",
        x_axis_label="Day",
        y_axis_label="Error",
        tooltips=[("State","$name"), ("Error", "$y")]
    )
    if regions is None:
        regions = df.columns
    for region in regions:
        p.line(x="date", y=region, source=source, line_width=2, color="#009ed7", alpha=0.5, name=region)
    p.output_backend = backend
    return p

p = plot_error(fs, df_region, '2020-04-15', f'Prediction Error - {region}')
show(p)    

In [83]:
df_r = aggregate_states(pd.read_csv(fs['2020-04-22'], index_col='date', parse_dates=['date']))['New York']
pd.concat([df_r, df_aggr.loc[df_r.index]['New York']], axis=1)

Unnamed: 0_level_0,New York,New York
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-04-23,20459.156116,20222.0
2020-04-24,21289.359038,20769.0
2020-04-25,22237.349975,21335.0
2020-04-26,23046.048339,21813.0
2020-04-27,23899.449765,22275.0
2020-04-28,24803.033174,22784.0
2020-04-29,25766.097205,23294.0
2020-04-30,26571.43405,23623.0
2020-05-01,27464.859949,23847.0
2020-05-02,28498.96209,24041.0
