In [1]:
%load_ext autoreload
%autoreload 2

# standard imports
import importlib
import numpy as np
import os
import pandas as pd
import torch as th
import pickle
import yaml

from argparse import Namespace
from datetime import timedelta
from pathlib import Path

from bokeh.io import output_notebook, export_svgs
from bokeh.layouts import gridplot
from bokeh.palettes import Blues3, Blues9
from bokeh.models import Span, LinearColorMapper
from bokeh.plotting import figure, ColumnDataSource
output_notebook()

# lib
import sys
sys.path.append('../')
from analysis import show

In [2]:
def load_model(jobdir, model='ar', prefix=''):
    mod = importlib.import_module(f'{model}')
    modcfg = yaml.load(open(f'{jobdir}/{model}.yml'), Loader=yaml.FullLoader)
    display(pd.DataFrame(modcfg['train'].items(), columns=['Key', 'Value']))
    modelcv = mod.CV_CLS()
    new_cases, regions, _, _ = modelcv.initialize(Namespace(**modcfg['train']))
    modelcv.func.load_state_dict(th.load(f'{jobdir}/{prefix}{model}_model.bin'))
    return modelcv.func, new_cases

In [3]:
sweepdir = "/checkpoint/maxn/covid19/forecasts/us/2020_06_19_14_04"
sweepdir = "/checkpoint/maxn/covid19/forecasts/us/2020_06_20_04_59"
sweepdir = "/checkpoint/maxn/covid19/forecasts/us/2020_06_20_06_03"
sweepdir = "/checkpoint/maxn/covid19/forecasts/us/2020_06_20_06_47"
sweepdir = "/checkpoint/maxn/covid19/forecasts/us/2020_06_20_12_22"
#sweepdir = "/checkpoint/maxn/covid19/forecasts/us/2020_06_20_08_19"
sweepdir = "/checkpoint/maxn/covid19/forecasts/us/2020_06_26_17_11"
sweepdir = "/checkpoint/maxn/covid19/forecasts/nystate/2020_07_03_17_50_02/sweep_2020-04-22"
sweepdir = "/checkpoint/maxn/covid19/forecasts/nystate/2020_07_13_12_27_43"
model_name = 'bar'

if 'CV_BASE_DIR' in os.environ:
    sweepdir = os.environ['CV_BASE_DIR']
    model_name = os.environ['CV_MODULE']
    os.environ['BOKEH_STATIC'] = '1'
    print(sweepdir)

rows = []
metric = 'MAE'
for path in Path(sweepdir).rglob("metrics.csv"):
    dirname = os.path.dirname(path)
    cfg = dirname + f"/{model_name}.yml"
    cfg = yaml.load(open(cfg), Loader=yaml.FullLoader)["train"]
    mets = pd.read_csv(dirname + '/metrics.csv', index_col='Measure')
    cfg[metric] = mets.loc[metric].iloc[-1]
    rows.append(cfg)
df = pd.DataFrame(rows)

In [4]:
from bokeh.transform import jitter
cols = ['granger', 'window', 'eta', 'weight_decay', 'niters', 'temporal']
#cols = ['window', 'weight_decay', 'niters']

ps = []
for col in cols:
    df[col] = df[col].apply(str)
    cats = np.unique(df[col].values)
    p = figure(
        width=150, 
        height=250, 
        title=col, 
        y_axis_label=metric, 
        x_range=cats,
    )
    source = df[[col, metric]]
    source = source[source[metric] < source[metric].quantile(.5)] # without outliers
    p.circle(x=col, y=metric, size=5, alpha=0.4, source=source)
    p.xgrid.grid_line_color = None
    p.xaxis.major_label_orientation = np.pi/4
    ps.append(p)
grid = gridplot(ps, ncols=6)
show(grid, 'img/params.png')

In [5]:
df = df.sort_values(by='MAE')
ix = 0
jobdir = os.path.dirname(df.iloc[ix].fdat)
prefix = 'final_model_' if ix == -1 else ''
model, ys = load_model(jobdir, model_name, prefix=prefix)
x = np.arange(ys.size(1)) + 1
betas, nus = model.beta(th.from_numpy(x).cuda(), ys.cuda())
betas = betas.detach().cpu().numpy()

Unnamed: 0,Key,Value
0,activation,sigmoid
1,decay,wave1_7_2
2,eta,0.1
3,fdat,/checkpoint/maxn/covid19/forecasts/nystate/202...
4,fpop,/private/home/maxn/covid19_spread/data/populat...
5,granger,0.5
6,loss,nb
7,lr,0.001
8,momentum,0.99
9,niters,50000


Number of Regions = 62
Timeseries length = 139
Max increase = 2663.0
Loading /private/home/maxn/covid19_spread/data/usa/testing/features.pt
Loading /private/home/maxn/covid19_spread/data/usa/fb/mobility_features.pt
Hamilton, New York
Loading /private/home/maxn/covid19_spread/data/usa/google/mobility_features.pt
Hamilton, New York
Feature size = 139 x 62 x 9


### Beta

In [9]:
regions = model.regions
#regions = ['New York City, New York', 'Bergen, New Jersey', 'Marion, Ohio'] + model.regions[
#    np.random.permutation(len(model.regions))
#][:10].tolist()

p = figure(
    plot_height=300,
    plot_width=400,
    title='Beta Evolution',
    tools="save",
    x_axis_label="Day",
    y_axis_label="Beta",
    #y_axis_type="log",
    tooltips=[("Region","$name"), ("Beta", "$y")]
)
for region in regions:
    ix = np.where(model.regions == region)[0]
    p.line(x=x, y=betas[ix].flatten(), line_width=1, color="#009ed7", name=region, alpha=0.5)
p.output_backend = 'svg'
show(p, 'img/betas.png')

### Granger Coefficients

In [10]:
alphas = model.metapopulation_weights().cpu().detach().numpy()
M = len(model.regions)
p = figure(
    title='Adjacency Matrix',
    tooltips=[("x", "$x"), ("y", "$y"), ("value", "@image")]
)
p.x_range.range_padding = p.y_range.range_padding = 0
p.image(
    image=[alphas], 
    x=0, y=0, dw=M, dh=M, 
    palette=np.array(Blues9)[::-1],
    level="image"
)
show(p, 'img/adj.png')

In [12]:
hist, edges = np.histogram(alphas.flatten(), density=True, bins=50)
p = figure(title='Weight distribution', height=250, width=400)
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], line_color="white", color="#009ed7")
show(p, 'img/weight_dist.png')

In [13]:
nu = model.nu.cpu().detach().numpy().flatten()
ix = np.argsort(nu)
nu[ix]

array([-12.522618  ,  -8.681534  ,  -6.0223856 ,  -4.794747  ,
        -4.68299   ,  -4.196314  ,  -4.011338  ,  -3.2205377 ,
        -3.2191117 ,  -3.0180273 ,  -2.786424  ,  -2.5106695 ,
        -2.4836447 ,  -2.2985976 ,  -1.8277038 ,  -1.4514631 ,
        -1.3081778 ,  -1.044273  ,  -0.68100584,  -0.5593376 ,
        -0.3371604 ,  -0.03840604,  -0.02083831,   0.15521066,
         0.2926568 ,   0.55586964,   0.7021354 ,   0.7718453 ,
         0.93046504,   1.1176454 ,   1.5438648 ,   1.6225386 ,
         1.8068553 ,   1.8915614 ,   2.13677   ,   2.388658  ,
         2.5716407 ,   2.6408536 ,   3.309848  ,   3.5561106 ,
         4.1927285 ,   4.1970387 ,   4.307631  ,   4.4104233 ,
         4.5250463 ,   5.0430346 ,   5.2569075 ,   5.6520185 ,
         5.688778  ,   6.2887397 ,   7.3758335 ,   7.4438624 ,
         7.4506497 ,   7.5853715 ,   7.9365706 ,   7.9487157 ,
         8.624101  ,   8.868639  ,   8.937091  ,   9.37163   ,
         9.473864  ,  10.304142  ], dtype=float32)

In [14]:
from bokeh.palettes import Blues5 as cmap
mus = [-1, 0, 1]
sigmas = [3, 2, 1]
for mu in mus:
    p = figure(title=f'Logit-Normal (μ={mu})', width=300, height=200, y_range=(0, 5))
    x = np.arange(0.001,1,0.001)
    for i, sigma in enumerate(sigmas):
        y = 1 / (sigma * np.sqrt(2 * np.pi)) 
        y = y * 1 / (x * (1 - x)) 
        y = y * np.exp(-(np.log(x / (1 - x)) - mu) ** 2 / (2 * sigma ** 2))
        p.line(x=x, y=y, line_width=2, legend_label=f'σ={sigma}', color=cmap[i])
    if mu == 0:
        p.legend.location = 'top_center'
        p.legend.orientation = 'horizontal'
    else:
        p.legend.visible = False
    p.output_backend = 'svg'
    show(p)