# Shortening the deconvolution window

In [None]:
# standard
import pickle
import warnings
from datetime import timedelta
warnings.simplefilter(action='ignore', category=FutureWarning)

# third party
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# first party
from config import Config

## Read in data

The results are the output of `05_deconvolution_window.p`, and should be stored in `./results/deconvolution_window/`.

The comparison is with tapered NTF using the naive kernel, varying 2d, 4d, and all-past.

In [None]:
def to_dataframe(a_dict):
    out = []
    for key, val in a_dict.items():
        if val.data is None:
            continue
        out.append(pd.DataFrame({'x': val.values, 'loc': val.geo_value, 'dates': val.dates}))
    out = pd.concat(out)
    out.set_index(['loc', 'dates'], inplace=True)
    return out

truth = to_dataframe(pickle.load(open('../data/tf_ground_truths.p', 'rb')))

In [None]:
storage_dir = './results/deconvolution_window/'
as_of_date_range = Config.every_10_as_of_range

In [None]:
training_options = {
    '2d': 2*Config.max_delay_days, 
    '4d': 4*Config.max_delay_days,
    'all-past': 365*10
}

output = {'2d': [], '4d': [], 'all-past': []}
for as_of in as_of_date_range:
    print(as_of)
    result = pickle.load(open(f'{storage_dir}/as_of_{as_of}.p', 'rb'))
    if len(result.keys()) != 3:
        print('Not all options ran on', as_of, 'skipping')
        continue 
    for option, option_length in training_options.items():
        start_date = as_of - timedelta(option_length)
        if start_date not in result.keys():
            start_date = max(start_date, Config.first_data_date)

        training_length = (as_of - start_date).days
        predictions = to_dataframe(result[start_date])
        errors = (truth - predictions).dropna().reset_index()
        errors['as_of'] = as_of
        output[option].append(errors)

In [None]:
analysis = []
for option, option_length in training_options.items():
    all_errors = pd.concat(output[option])
    all_errors['training'] = option
    all_errors['abs_err'] = np.abs(all_errors.x)
    all_errors['lag'] = (pd.to_datetime(all_errors.as_of) - all_errors.dates).dt.days 
    analysis.append(all_errors)
    
analysis = pd.concat(analysis, ignore_index=True)

In [None]:
plt.figure(figsize=(10, 5))
sns.lineplot(
 data=analysis,
 x='lag',
 y='abs_err',
 hue='training',
 style='training',
 markers=True,
 err_kws={'alpha': 0.1}
)
plt.title('Effect of varying deconvolution window')
plt.ylabel('Mean absolute error')
plt.xlabel('Days back from nowcast time')
plt.tight_layout()

In [None]:
plot_df = analysis[analysis.lag.le(10)]
plot_df.replace({'2d': '$2d$', '4d': '$4d$', 'all-past': 'All past'}, inplace=True)

In [None]:
plt.figure(figsize=(5, 5))
sns.lineplot(
 data=plot_df,
 x='lag',
 y='abs_err',
 hue='training',
 style='training',
 markers=True,
 err_kws={'alpha': 0.1}
)
plt.title('Training windows in deconvolution')
plt.ylabel('Mean absolute error')
plt.xlabel('Days back from nowcast time')
plt.xticks(range(2, 11, 2))
plt.legend(title=None)
plt.tight_layout()
plt.savefig('./figures/deconvolution_window_05_small_square.pdf')