In [1]:
%load_ext autoreload
%autoreload 2

# standard imports
import numpy as np
import os
import pandas as pd
import pickle

from datetime import timedelta

from bokeh.io import output_notebook, export_svgs
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, ColumnDataSource
output_notebook()

# lib
import sys
sys.path.append('../')
from metrics import compute_metrics, _compute_metrics
from analysis import load_backfill, plot_accuracy, plot_cases, plot_prediction_interval, plot_metric_for_dates

### Progression in Regions

In [2]:
# Load ground truth data
df_regions = pd.read_csv('../data/italy/data-upc.csv', index_col='region')
df_regions = df_regions.transpose()
df_regions.index.set_names(['date'], inplace=True)
df_regions.index = pd.to_datetime(df_regions.index)
print('Days = {}, Regions = {}'.format(*df_regions.shape))

# plot deaths over time 
p = plot_cases(df_regions, 'Confirmed cases per region in Italy', backend='svg')
show(p)

Days = 83, Regions = 106


### Load Backfill and Configs

In [36]:
# cases
job = "it/2020_05_24_22_29"
job = "it/2020_05_24_22_47"
job = "it/2020_05_25_08_04"
#job = "it/2020_05_25_14_07"
#job = "it/2020_05_25_14_46"
#job = "it/2020_05_25_17_25"
#job = "it/2020_05_25_18_50"
job = "it/2020_05_25_19_31"
job = "it/2020_05_25_19_38"
job = "it/2020_05_26_21_48"
job = "it/2020_05_27_05_26"
job = "it/2020_05_29_07_17"
fs, cfgs = load_backfill(job) 
cfgs.drop(columns=['fdat', 'fpop', 'job'])

Unnamed: 0_level_0,decay,loss,lr,momentum,niters,t0,test_on,weight_decay,window
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-04-22,latent2_1,nb,0.001,0.9,30000,5,21,0.2,10
2020-04-29,latent2_1,nb,0.001,0.9,30000,5,21,0.2,20
2020-05-05,latent2_1,nb,0.001,0.9,30000,5,21,0.2,20
2020-04-19,latent2_1,nb,0.001,0.9,30000,5,21,0.2,20
2020-04-26,latent2_1,nb,0.001,0.9,30000,5,21,0.2,15
2020-05-01,latent2_1,nb,0.001,0.9,30000,5,21,0.2,15


In [41]:
pi_multipliers = {
    .99: 2.58,
    .95: 1.96,
    .80: 1.28, 
}
ps = []
accs = []
plevel = (.01, .99)

def select_piv(df, interval):
    _df = df.drop(columns=['piv'])
    return _df[df["piv"] == interval].set_index('date')

for date in cfgs.index:
    jobdir = cfgs.loc[date]['job']
    df_piv = pd.read_csv(f'{jobdir}/../forecasts/piv_best_mae.csv', parse_dates=['date'])
    lower = select_piv(df_piv, str(plevel[0]))
    upper = select_piv(df_piv, str(plevel[1]))
    mean = select_piv(df_piv, "mean")

    ix = np.intersect1d(mean.index, df_regions.index)
    df_gt = df_regions.loc[ix]
    mean = mean.loc[ix]
    lower = lower.loc[ix]
    upper = upper.loc[ix]                         

    p = figure(title=f"Prediction Intervals {date[-5:]}, {len(mean)} days", 
               plot_width=300, plot_height=300, tools='save,hover',
              x_axis_label="Forecast days", y_axis_label="Cases",
              tooltips=[("Name", "$name"), ("Value", "$y")])
    regions = df_regions.columns
    for i, region in enumerate(regions):
        p = plot_prediction_interval(mean, lower, upper, df_gt, region, p)
    ps.append(p)
    
    for d in [6, 13, 20]:
        if d >= len(df_gt):
            continue
        z = np.logical_and(df_gt.iloc[d] < upper.iloc[d], df_gt.iloc[d] > lower.iloc[d])
        acc = sum(z) / len(z)
        accs.append((date, d + 1, acc))

# plot accuracies        
accs = pd.DataFrame(accs, columns=['date', 'days', 'acc'])
p = plot_accuracy(accs, plevel, 'Confirmed Cases, Italy', {'2020-04-19'})
show(p)

# plot prediciton intervals
# grid = gridplot(ps, ncols=2, plot_width=430)
grid = gridplot([ps[0], ps[-2], ps[1]], ncols=3)
show(grid)
_ = export_svgs(grid, filename='/tmp/piv_ita.svg')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mets["date"] = pd.to_datetime(mets["date"])


### IHME

Compare our forecasts to published data by [IHME](https://covid-19.bsvgateway.org/)

In [26]:
dates_los_alamos = [
    '2020-04-22',
    '2020-04-29',
    '2020-05-05',
    #'2020-05-12',
    "2020-04-19",
    '2020-04-26',
    '2020-05-01'
]
ps = plot_metric_for_dates(fs, df_regions, dates_los_alamos, 'IHME', 'IHME', 'MAE', None)
grid = gridplot(ps, ncols=2, plot_width=430)
show(grid)
_ = export_svgs(grid, filename='/tmp/mae_ita.svg')

In [31]:
ps = plot_metric_for_dates(fs, df_regions, dates_los_alamos, 'Los Alamos', 'los_alamos', 'MAE', '')
grid = gridplot(ps, ncols=2, plot_width=430)
show(grid)

KeyError: 'CampoBasso'

### Northeastern GLEAM

In [556]:
dates_gleam = [
    #'2020-04-12',
    #'2020-04-19', 
    '2020-04-27',
    #'2020-05-05',
]
p = plot_metric_for_dates(fs, dates_gleam, 'GLEAM', '../data/gleam/predictions_{}.csv', 'MAE')
show(p)
_ = export_png(p, filename='/tmp/gleam.png')

2020-04-28     21.156863
2020-04-29     43.372549
2020-04-30     61.980392
2020-05-01     80.078431
2020-05-02     95.921569
2020-05-03    106.549020
2020-05-04    113.156863
2020-05-05    136.843137
2020-05-06    145.705882
2020-05-07    167.647059
2020-05-08    186.156863
Name: MAE_NAIVE, dtype: float64
2020-04-28     14.625000
2020-04-29     28.500000
2020-04-30     38.062500
2020-05-01     52.395833
2020-05-02     65.833333
2020-05-03     76.791667
2020-05-04     85.645833
2020-05-05    100.583333
2020-05-06    103.770833
2020-05-07    120.291667
2020-05-08    137.416667
Name: MAE_NAIVE, dtype: float64


### MIT

In [573]:
dates_mit = [
    '2020-04-23',
    '2020-04-28', 
    '2020-05-01',
    #'2020-05-03',
]
p = plot_metric_for_dates(fs, dates_mit, 'MIT', '../data/mit/predictions_{}.csv', 'MAE')
show(p)
_ = export_png(p, filename='/tmp/mit.png')

2020-04-24    118.784314
2020-04-25    112.313725
2020-04-26     96.235294
2020-04-27     91.627451
2020-04-28     91.098039
2020-04-29     98.333333
2020-04-30    100.725490
2020-05-01     95.137255
2020-05-02     87.901961
2020-05-03     79.941176
2020-05-04     66.568627
2020-05-05     73.960784
2020-05-06     85.764706
2020-05-07     92.843137
2020-05-08    108.098039
Name: MAE, dtype: float64
2020-04-29    103.686275
2020-04-30    105.607843
2020-05-01    101.784314
2020-05-02     93.058824
2020-05-03     88.901961
2020-05-04     87.686275
2020-05-05     90.019608
2020-05-06    103.568627
2020-05-07    105.980392
2020-05-08    109.960784
Name: MAE, dtype: float64
2020-05-02    110.764706
2020-05-03    107.019608
2020-05-04    100.862745
2020-05-05    108.450980
2020-05-06    126.392157
2020-05-07    130.941176
2020-05-08    133.294118
Name: MAE, dtype: float64
