# Defining ground truth

In [None]:
# standard
import pickle
from datetime import timedelta
from functools import partial

# third party
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# first party
from config import Config
from data_containers import LocationSeries
from deconvolution import deconvolution

In [None]:
as_of = Config.ground_truth_date
kernel_file = "../data/naive_delay_distributions/uncensored_delay_distribution.p"
kernel_dict = pickle.load(open(kernel_file, "rb"))

convolved_truth_indicator = Config.ground_truth_indicator
convolved_response_prefix = f'../data/jhu-csse_confirmed_incidence_prop/{convolved_truth_indicator.source}_{convolved_truth_indicator.signal}'
convolved_ground_truth = pickle.load(open(convolved_response_prefix + f'_{as_of}.p', 'rb'))

In [None]:
tf = partial(
    deconvolution.deconvolve_tf_cv,
    k=3,
    fit_func=partial(deconvolution.deconvolve_tf, natural=False),
    lam_cv_grid=np.r_[np.logspace(1, 3.5, 10), [5000, 8000, 15000]],
    gam_cv_grid=np.array([0.]),
    verbose=False,
)


start_date = Config.first_data_date
end_date = as_of -  timedelta(convolved_truth_indicator.lag)
full_dates = pd.date_range(start_date, end_date)

ground_truths = {}
for loc, data in convolved_ground_truth.items():
    print(data.geo_value, data.geo_type)
    signal = data.get_data_range(start_date, end_date, 'locf')
    out = tf(
        y=np.array(signal),
        x=np.arange(1, len(signal)+1),
        kernel_dict=kernel_dict,
        as_of_date=as_of)
    
    # We only store estimates up till t-2, because the convolutional reporting distribution
    # is not supported on 0.
    ground_truths[data.geo_value] = LocationSeries(
        data.geo_value, data.geo_type, dict(zip(full_dates[:-1], out[:-1])))
    
with open(f'../data/tf_ground_truths.p', 'wb') as f:
    pickle.dump(ground_truths, f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
len(ground_truths.keys())

In [None]:
plt.figure(figsize=(12, 5))
plt.scatter(convolved_ground_truth[('jhu-csse', 'confirmed_incidence_prop', 'state', 'ny')].dates,
            convolved_ground_truth[('jhu-csse', 'confirmed_incidence_prop', 'state', 'ny')].values,
            color='gray', s=0.3, label='cases')
plt.plot(ground_truths['ny'].dates, ground_truths['ny'].values, ls='--', label='tf')
plt.legend()
plt.show()