# Plots of boundary regularization

In [None]:
# standard
import pickle
from collections import defaultdict
from datetime import date, timedelta
from functools import partial

# third party
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import cm

# first party
from config import Config

## Tapered effect

In [None]:
convolved_truth_indicator = Config.ground_truth_indicator
ds_ntf = partial(Config.deconv_fit_func, verbose=False)

In [None]:
as_of = date(2021, 2, 1)
loc = 'ny'
convolved_response_prefix = f'../data/jhu-csse_confirmed_incidence_prop/{convolved_truth_indicator.source}_{convolved_truth_indicator.signal}'
convolved_ground_truth = pickle.load(open(convolved_response_prefix + f'_{as_of}.p', 'rb'))
data = [val for val in convolved_ground_truth.values() if val.geo_value == loc][0]
start_date = as_of-timedelta(int(4*Config.max_delay_days))
end_date = as_of-timedelta(convolved_truth_indicator.lag)
full_dates = pd.date_range(start_date, end_date)
signal = np.array(data.get_data_range(start_date, end_date, 'locf'))

# Set parameters
lam = 100.
gam_grid = ((np.logspace(0, 5, 20) - 1))
cm_subsection = np.linspace(0, 1, gam_grid.size + 2) 
colors = [cm.nipy_spectral(x) for x in cm_subsection][::-1][1:]

plt.figure(figsize=(12,  5))
plt.axvspan(end_date-timedelta(Config.max_delay_days), end_date, 
            alpha=0.1, color='tab:gray')
plt.scatter(full_dates, signal, label='Cases', s=4, color='darkgray')
for i, gam in enumerate(gam_grid):
    est_infections = ds_ntf(
        y=signal, x=np.arange(1, len(signal)+1),
        kernel_dict=Config.get_delay_distribution(as_of),
        as_of_date=as_of,
        lam_cv_grid=np.array([lam]),
        gam_cv_grid=np.array([gam]))
    
    # Only up to t-2, since the convolution reporting delay is not supported on 0.
    plt.plot(full_dates[:-1], est_infections[:-1], label=f'$\gamma$={gam:.1f}', color=colors[i])

handles, labels = plt.gca().get_legend_handles_labels()
plt.xlim(left=end_date-timedelta(2.5*Config.max_delay_days), right=as_of)
plt.legend(handles[:-3][::3] + handles[-3:], labels[:-3:][::3] + labels[-3:], loc='upper left', fontsize='x-small')
plt.ylabel('Cases/infections per 100,000 people')
plt.xlabel('Date')
plt.title(f"Regularized deconvolution in New York")
plt.tight_layout()
plt.savefig("./figures/effect_tapered_ntf.pdf")
plt.show()

## Comparison of all TF methods

In [None]:
def to_dataframe(a_dict):
    out = []
    for key, val in a_dict.items():
        if val.data is None:
            continue
        out.append(pd.DataFrame({'x': val.values, 'loc': val.geo_value, 'dates': val.dates}))
    out = pd.concat(out)
    out.set_index(['loc', 'dates'], inplace=True)
    return out

location = 'ny'
as_of_date_range = Config.as_of_range
storage_dir = './results/'
storage_dirs = [storage_dir + x for x in [
    'natural_constraints',
    'natural_constraints',
    'tapered_smoothing']]
methods = ['Trend filtering', 'Natural trend filtering', 'Natural trend filtering (tapered)']

convolved_response_prefix = '../data/jhu-csse_confirmed_incidence_prop/jhu-csse_confirmed_incidence_prop'
convolved_ground_truth = pickle.load(
        open(convolved_response_prefix + f'_2021-06-01.p', 'rb'))
convolved_ground_truth = convolved_ground_truth[('jhu-csse', 'confirmed_incidence_prop', 'state', location)]

truth = to_dataframe(pickle.load(open('../data/tf_ground_truths.p', 'rb'))).loc[location].reset_index()

In [None]:
output = defaultdict(list)
for as_of in as_of_date_range:
    print(as_of)
    for method, path in zip(methods, storage_dirs):
        result = pickle.load(open(f'{path}/as_of_{as_of}.p', 'rb'))
        if method == 'Trend filtering':
            predictions = to_dataframe(result['tf'])
        elif method == 'Natural trend filtering':
            predictions = to_dataframe(result['ntf'])
        elif method == 'Natural trend filtering (tapered)':
            predictions = to_dataframe(result['ntf_tapered'])
        else:
            print('Unknown method')
            break
            
        try:
            loc_preds = predictions.loc[location]
            loc_preds['as_of'] = as_of
            output[method].append(loc_preds)
        except Exception as e:
            print(e)

In [None]:
fignames = ['tf', 'ntf', 'tapered_ntf']
for name, (method, method_data) in zip(fignames, output.items()):
    all_data = pd.concat(method_data).reset_index()
    all_as_of = sorted(all_data.as_of.unique())[:12]
    cm_subsection = np.linspace(0, 1, len(all_as_of) + 2) 
    colors = [cm.nipy_spectral(x) for x in cm_subsection][::-1]

    plt.figure(figsize=(5, 5))
    plt.scatter(convolved_ground_truth.dates, convolved_ground_truth.values,
                color='tab:gray', label='Cases', alpha=0.5, s=10)
    plt.plot(truth.dates, truth.x, color='black', ls='--', label='Finalized infections')
    for i, as_of in enumerate(all_as_of):
        sub_df = all_data[all_data.as_of.eq(as_of)]
        plt.plot(sub_df.dates, sub_df.x, color=colors[i], alpha=0.5)

    plt.legend()
    plt.title(method)
    plt.ylabel('Cases/infections per 100,000 people')
    plt.xlim(date(2020, 12, 15), date(2021, 6, 1))
    plt.ylim(0, 110)
    plt.tight_layout()
    plt.savefig(f'./figures/ny_{name}.pdf')
    plt.show()