In [None]:
# standard libraries
import pickle5 as pickle
from datetime import timedelta, date
from functools import partial

# third party
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from delphi_nowcast.constants import Default
from delphi_nowcast.deconvolution import deconvolution

In [None]:
geo = 'ny'
as_of = date(2021, 2, 8)
ground_truth_as_of = date(2020, 8, 28)
first_data_date = date(2020, 5, 1)

# load convolved cases
convolved_truth_indicator = Default.GROUND_TRUTH_INDICATOR
convolved_response_prefix = f'../data/jhu-csse_confirmed_incidence_prop/{convolved_truth_indicator.source}_{convolved_truth_indicator.signal}'
convolved_cases = pickle.load(open(convolved_response_prefix + f'_{as_of}.p', 'rb'))
convolved_cases = convolved_cases[('jhu-csse', 'confirmed_incidence_prop', 'state', geo)]

# set date range
last_est_date = as_of - timedelta(convolved_truth_indicator.lag)
full_dates = [d.date() for d in pd.date_range(first_data_date, last_est_date)]

# load convolution delay distribution
delay_dist = pickle.load(open(f'../data/km_delay_distributions/delay_distribution_as_of_{as_of}.p', 'rb'))

# load nowcast estimates
simple_avg = pickle.load(open(f'../data/fusion_sensors/fast_all_simple_average_{as_of}.p', 'rb'))
simple_avg = pd.DataFrame({
    'dates': pd.to_datetime(simple_avg[geo].dates[-10:]),
    'Average': simple_avg[geo].values[-10:]
})

# load final infection estimates
truth = pickle.load(open('../data/tf_ground_truths.p', 'rb'))
final_truth = truth[geo].values
final_full_dates = truth[geo].dates

In [None]:
# create naive estimate
tf = partial(
    deconvolution.deconvolve_double_smooth_tf_cv,
    k=3,
    fit_func=partial(deconvolution.deconvolve_double_smooth_tf_fast, natural=False),
    gam_cv_grid=np.r_[0.],
)

convolved_cases_values = convolved_cases.get_data_range(first_data_date, last_est_date, 'locf')
est = tf(
    y=np.array(convolved_cases_values),
    x=np.arange(1, len(convolved_cases_values) + 1),
    kernel_dict=delay_dist,
    as_of_date=as_of
)

In [None]:
# create output dataset
cases = pd.DataFrame.from_dict(convolved_cases.data, orient='index').reset_index()
cases.columns = ['dates', 'cases']
cases.dates = pd.to_datetime(cases.dates)

# the delay distribution is not supported on zero, so we only can
# produce good estimates till t-1
infections = pd.DataFrame({'dates': pd.to_datetime(full_dates)[:-1], 'infections': est[:-1]})
final_infections = pd.DataFrame({
    'dates': pd.to_datetime(final_full_dates),
    'final_infections': final_truth
})

out = pd.merge(
    final_infections, 
    pd.merge(cases, 
             pd.merge(infections, simple_avg, on='dates', how='outer'), 
              on='dates', how='outer'),
    on='dates', how='outer'
)

out.sort_values('dates', inplace=True)
out['time'] = out.dates
out.to_csv('./nowcast_demo_data.csv', index=False)

In [None]:
# write out delay distribution
sample_delay_date = date(2020, 11, 15)
sample_delay_dist = pickle.load(open(f'../data/km_delay_distributions/delay_distribution_as_of_{sample_delay_date}.p', 'rb'))
pd.DataFrame(sample_delay_dist[sample_delay_date]).to_csv(f'./kernel_{sample_delay_date}.csv', index=False)