# Ensemble Challenge (18-month Evaluation)

Timepoint: July 15, 2021. 

Setting: New York State upon the arrival of the Delta variant. Vaccines available

In [None]:
# TODO: 
# - collect and process data
# - collect 3 candidate models
# - set parameter values and interventions
# - create observables for cumulative cases, hospitalizations, and deaths
# - calibrate models independently
# - calibrate ensemble of 3 models 
# - improve calibration with more data
# - plot and post-process results

### Load dependencies

In [9]:
import pandas as pd

import pyciemss
import pyciemss.visuals.plots as plots
import pyciemss.visuals.vega as vega
import pyciemss.visuals.trajectories as trajectories

# Data processing

In [11]:
location = "New York"
BETTER_NAMES = {'Cases': 'Infected',
                'Hospitalizations': 'Hospitalized',
                'Deaths': 'Dead'}
## instantiate, making sure no dates are skipped between start and end (could be found dynamically)
full_dataset = pd.DataFrame({'date':pd.date_range(start='1/22/2020', end='3/29/2024')})

## fill the instantiated dataset
for i in ['Cases','Hospitalizations','Deaths']:
    all_data = pd.read_csv(f'https://media.githubusercontent.com/media/reichlab/covid19-forecast-hub/master/data-truth/truth-Incident%20{i}.csv')
    subset   = all_data[all_data.location_name == location].groupby("date")["value"].sum().reset_index()
    subset.date = pd.to_datetime(subset.date)
    subset['cumsum'] = subset.value.cumsum()
    full_dataset = full_dataset.merge(subset[['date','cumsum']], how='outer', on='date').rename(columns={'cumsum':BETTER_NAMES[i]})

full_dataset[(full_dataset.date >= '2021-06-01') & (full_dataset.date < '2021-09-02')].reset_index(drop=True).reset_index(names=['Timestamp']).drop(['date'],axis=1)

  all_data = pd.read_csv(f'https://media.githubusercontent.com/media/reichlab/covid19-forecast-hub/master/data-truth/truth-Incident%20{i}.csv')
  all_data = pd.read_csv(f'https://media.githubusercontent.com/media/reichlab/covid19-forecast-hub/master/data-truth/truth-Incident%20{i}.csv')


Unnamed: 0,Timestamp,Infected,Hospitalized,Dead
0,0,2102869.0,136862.0,53123.0
1,1,2103269.0,136968.0,53132.0
2,2,2103768.0,137089.0,53151.0
3,3,2104539.0,137179.0,53207.0
4,4,2105375.0,137249.0,53221.0
...,...,...,...,...
88,88,2262893.0,148866.0,54159.0
89,89,2269038.0,149106.0,54199.0
90,90,2274680.0,149454.0,54230.0
91,91,2278590.0,149812.0,54254.0


In [12]:
def get_data_between(start='2021-06-01',end='2021-09-02'):
    return full_dataset[(full_dataset.date >= start) & (full_dataset.date < end)].reset_index(drop=True).reset_index(names=['Timestamp']).drop(['date'],axis=1)

dataset = get_data_between('2021-06-01', '2021-09-02')
# dataset

Unnamed: 0,Timestamp,Infected,Hospitalized,Dead
0,0,2102869.0,136862.0,53123.0
1,1,2103269.0,136968.0,53132.0
2,2,2103768.0,137089.0,53151.0
3,3,2104539.0,137179.0,53207.0
4,4,2105375.0,137249.0,53221.0
...,...,...,...,...
88,88,2262893.0,148866.0,54159.0
89,89,2269038.0,149106.0,54199.0
90,90,2274680.0,149454.0,54230.0
91,91,2278590.0,149812.0,54254.0


# Gather models and data

In [25]:
model1 = "SEIRHD_age_structured_petrinet.json"
model2 = "model"
model3 = "model"

dataset1 = get_data_between('2021-06-01', '2021-07-14')
dataset2 = get_data_between('2021-06-01', '2021-07-21')
dataset3 = get_data_between('2021-06-01', '2021-07-29')
dataset4 = get_data_between('2021-06-01', '2021-08-04')

# (1) Forecast 1: 07/15/2021 - 08/12/2021

## (A) Calibrate an ensemble of a single model

In [36]:
num_iterations = 1000
model_paths = [model1]
solution_mappings = [lambda x : x]
data_mapping = {'Infected': 'Cumulative_cases', 'Hospitalized': 'Cumulative_hosp', 'Dead': 'deceased'}
calibrated_results = pyciemss.ensemble_calibrate(model_paths, solution_mappings, dataset1, data_mapping=data_mapping, num_iterations=num_iterations)
parameter_estimates = calibrated_results["inferred_parameters"]
print(parameter_estimates())

  if not data_df.applymap(lambda x: isinstance(x, (int, float))).all().all():


Data printout: This dataset contains 42 rows of data. The first column, Timestamp, begins at 0 and ends at 42. The subsequent columns are named: Infected, Hospitalized, Dead
{'model_weights': tensor([1.], grad_fn=<ExpandBackward0>), 'model_0/persistent_beta': tensor(0.0513, grad_fn=<ExpandBackward0>), 'model_0/persistent_r_EI': tensor(0.1005, grad_fn=<ExpandBackward0>), 'model_0/persistent_r_IR_y': tensor(0.4825, grad_fn=<ExpandBackward0>), 'model_0/persistent_r_IR_m': tensor(0.1992, grad_fn=<ExpandBackward0>), 'model_0/persistent_r_IH_y': tensor(0.0237, grad_fn=<ExpandBackward0>), 'model_0/persistent_r_IH_m': tensor(0.0979, grad_fn=<ExpandBackward0>), 'model_0/persistent_r_IH_o': tensor(0.0385, grad_fn=<ExpandBackward0>), 'model_0/persistent_r_HR_y': tensor(0.3061, grad_fn=<ExpandBackward0>), 'model_0/persistent_r_HR_m': tensor(0.1536, grad_fn=<ExpandBackward0>), 'model_0/persistent_r_HR_o': tensor(0.1011, grad_fn=<ExpandBackward0>), 'model_0/persistent_r_HD_y': tensor(0.0012, grad_fn

## Sample calibrated model and plot results

In [37]:
start_time = 0.0
end_time = 72.0
logging_step_size = 10.0
num_samples = 100

calibrated_ensemble_result = pyciemss.ensemble_sample(model_paths, solution_mappings, end_time, logging_step_size, num_samples, 
                                                      start_time=start_time, inferred_parameters=parameter_estimates)
display(calibrated_ensemble_result['data'].head())

# Plot the ensemble result for cumulative cases, hospitalizations, and deaths
nice_labels={"deceased_observable_state": "Deaths", 
                 "Cumumlative_hosp_state": "Cumul Hosp",
                 "Cumulative_cases_state": "Cumul Cases"
                }
schema = plots.trajectories(calibrated_ensemble_result["data"], 
                            keep=["deceased_observable_state", "Cumumlative_hosp_state", "Cumumlative_cases_state"], 
                            relabel=nice_labels,
                            # points=dataset1.drop(columns=['Timestamp']).reset_index(drop=True)
                          )
plots.save_schema(schema, "_schema.json")
plots.ipy_display(schema, dpi=150)

Unnamed: 0,timepoint_id,sample_id,timepoint_unknown,model_0/weight_param,model_0/persistent_beta_param,model_0/persistent_r_EI_param,model_0/persistent_r_IR_y_param,model_0/persistent_r_IR_m_param,model_0/persistent_r_IH_y_param,model_0/persistent_r_IH_m_param,...,R_y_state,S_m_state,S_o_state,S_y_state,susceptible_state,exposed_state,infected_state,recovered_state,hospitalized_state,deceased_state
0,0,0,10.0,1.0,0.050968,0.100441,0.49919,0.199377,0.028002,0.09864,...,427354.09375,10783815.0,3166580.75,3531517.0,17481912.0,21991.367188,4508.123535,2111840.75,1231.992065,53221.035156
1,1,0,20.0,1.0,0.050968,0.100441,0.49919,0.199377,0.028002,0.09864,...,433238.0625,10732120.0,3159071.0,3516987.0,17408178.0,58320.394531,11827.084961,2134108.5,3074.911377,53415.765625
2,2,0,30.0,1.0,0.050968,0.100441,0.49919,0.199377,0.028002,0.09864,...,448801.34375,10596472.0,3139539.0,3478791.0,17214802.0,152964.75,31046.246094,2192707.5,8084.44043,53908.726562
3,3,0,40.0,1.0,0.050968,0.100441,0.49919,0.199377,0.028002,0.09864,...,489295.5,10251973.0,3089436.25,3381435.0,16722844.0,389719.375,79694.257812,2345103.75,20937.236328,55189.238281
4,4,0,50.0,1.0,0.050968,0.100441,0.49919,0.199377,0.028002,0.09864,...,590109.9375,9442826.0,2967878.75,3150968.0,15561673.0,923806.125,192674.53125,2725635.25,51733.0625,58431.402344


ValueError: Vega to PNG conversion failed:
SVG has an invalid size

## (B) Calibrate an ensemble of multiple models

# (2) Forecast 2: 07/22/2021 - 08/19/2021

# (3) Forecast 3: 07/29/2021 - 08/26/2021

# (4) Forecast 4: 08/05/2021 - 09/02/2021