# Ensemble Challenge: Timepoint 2

Goal: to capture the complexity and nuances around the evolution of the pandemic at various stages and locations.

Location A: New York State

Timepoint 2: July 15, 2021. Setting: New York State upon the arrival of the Delta variant. Vaccines available.

### Load dependencies

In [1]:
import os
import pandas as pd
import numpy as np
from pyciemss.Ensemble.interfaces import (
    load_and_sample_petri_ensemble, load_and_calibrate_and_sample_ensemble_model
)
from pyciemss.PetriNetODE.interfaces import (
    load_and_sample_petri_model,
    load_and_calibrate_and_sample_petri_model,
    load_and_optimize_and_sample_petri_model,
    load_and_calibrate_and_optimize_and_sample_petri_model
)
from pyciemss.visuals import plots

## Get data

In [2]:
url = 'https://raw.githubusercontent.com/DARPA-ASKEM/experiments/main/thin-thread-examples/milestone_12month/evaluation/ensemble_eval_SA/datasets/aabb3684-a7ea-4f60-98f1-a8e673ad6df5/dataset.csv'
ny_data = pd.read_csv(url)
ny_data

# Grab test data for four-week forecast (07/15/2021 - 05/01/2020)
test_data = ny_data[500:569].reset_index()
test_data = test_data.drop(columns="timestep")
test_data = test_data.drop(columns="index")

# Select historical data up to Timepoint 2, 07/14/2021 (the first 542 rows)
ny_data = ny_data[500:541].reset_index(drop=True)
ny_data1 = ny_data.assign(timepoints=[float(i) for i in range(len(ny_data))])
ny_data = ny_data1[["timepoints", "I", "H", "D"]]
ny_data[["I", "H", "D"]].to_csv("NY_data1.csv")

## Set up timepoints

In [3]:
start_timepoint = 0
stop_timepoint = 41 + 28 # simulate for four weeks after end of data
timepoints = [float(i) for i in range(stop_timepoint + 1)]

## Select relevant models

In [8]:
# model1_location = "../../notebook/ensemble_eval_sa/age_vacc_var_V1.json"
model2_location = "../../notebook/ensemble_eval_sa/age_vacc_var_v2.json"
# model3_location = "../../notebook/ensemble_eval_sa/age_vacc_var_reinfection_v1.json"
model4_location = "../../notebook/ensemble_eval_sa/age_vacc_var_reinfection_v2.json"
model5_location = "../../notebook/ensemble_eval_sa/age_vacc_var_reinfection_v3.json"

## Load, calibrate and sample an ensemble of one model

In [5]:
num_samples=2
prior_samples = load_and_sample_petri_model(model2_location, num_samples, timepoints=timepoints, method="euler",
                                            visual_options={"title": "Prior Distributions", "subset":".*_sol"}, 
                                            time_unit="days")
#display(prior_samples)

schema = plots.trajectories(pd.DataFrame(prior_samples["data"]), subset=".*_sol",
                            points=test_data.reset_index(drop=True).rename(columns={"I":"I_data", "H":"H_data", "D":"D_data"}))
schema = plots.pad(schema, 5)
plots.ipy_display(schema)




In [None]:
num_samples = 100
model_paths = [model2_location]
data_path = "../../notebook/ensemble_eval_sa/NY_data2.csv"
weights = [1]
solution_mappings = [{"I": "infected", "H": "hospitalized", "D": "dead"}] # "column name in data": "observable or state variable in model"
# solution_mappings = [{"I": "I", "H": "H", "D": "D"}]

# Run the calibration and sampling
result = load_and_calibrate_and_sample_ensemble_model(
    model_paths,
    data_path,
    weights,
    solution_mappings,
    num_samples,
    timepoints,
    verbose=True,
    total_population=19340000,
    num_iterations=200,
    method="euler",
    time_unit="days",
    visual_options={"title": "Calibrated Ensemble", "subset":".*_sol"}
)

# Save results
result["data"].to_csv("../../notebook/ensemble_eval_sa/partII_ensemble_of_one_results.csv", index=False)
result["quantiles"].to_csv("../../notebook/ensemble_eval_sa/partII_ensemble_of_one_quantiles.csv", index=False)

# Plot results
schema = plots.trajectories(pd.DataFrame(result["data"]), subset=".*_sol",
                            points=test_data.reset_index(drop=True).rename(columns={"I":"I_data", "H":"H_data", "D":"D_data"}),
                           )
schema = plots.pad(schema, 5)
plots.ipy_display(schema)

iteration 0: loss = 2723.8902804255486
iteration 25: loss = 2082.1973752379417
iteration 50: loss = 1375.9724009633064
iteration 75: loss = 1247.4361398816109


In [None]:
num_samples = 100
model_paths = [model4_location]
data_path = "../../notebook/ensemble_eval_sa/NY_data2.csv"
weights = [1]
solution_mappings = [{"I": "infected", "H": "hospitalized", "D": "dead"}] # "column name in data": "observable or state variable in model"

# Run the calibration and sampling
result = load_and_calibrate_and_sample_ensemble_model(
    model_paths,
    data_path,
    weights,
    solution_mappings,
    num_samples,
    timepoints,
    verbose=True,
    total_population=19340000,
    num_iterations=26,
    method="euler",
    time_unit="days",
    visual_options={"title": "Calibrated Ensemble", "subset":".*_sol"}
)

# # Save results
# result["data"].to_csv("../../notebook/ensemble_eval_sa/partII_ensemble_of_one_results.csv", index=False)
# result["quantiles"].to_csv("../../notebook/ensemble_eval_sa/partII_ensemble_of_one_quantiles.csv", index=False)

# Plot results
schema = plots.trajectories(pd.DataFrame(result["data"]), subset=".*_sol",
                            points=test_data.reset_index(drop=True).rename(columns={"I":"I_data", "H":"H_data", "D":"D_data"}),
                           )
schema = plots.pad(schema, 5)
plots.ipy_display(schema)

In [None]:
num_samples = 100
model_paths = [model5_location]
data_path = "../../notebook/ensemble_eval_sa/NY_data2.csv"
weights = [1]
solution_mappings = [{"I": "infected", "H": "hospitalized", "D": "dead"}] # "column name in data": "observable or state variable in model"

# Run the calibration and sampling
result = load_and_calibrate_and_sample_ensemble_model(
    model_paths,
    data_path,
    weights,
    solution_mappings,
    num_samples,
    timepoints,
    verbose=True,
    total_population=19340000,
    num_iterations=26,
    method="euler",
    time_unit="days",
    visual_options={"title": "Calibrated Ensemble", "subset":".*_sol"}
)

# # Save results
# result["data"].to_csv("../../notebook/ensemble_eval_sa/partII_ensemble_of_one_results.csv", index=False)
# result["quantiles"].to_csv("../../notebook/ensemble_eval_sa/partII_ensemble_of_one_quantiles.csv", index=False)

# Plot results
schema = plots.trajectories(pd.DataFrame(result["data"]), subset=".*_sol",
                            points=test_data.reset_index(drop=True).rename(columns={"I":"I_data", "H":"H_data", "D":"D_data"}),
                           )
schema = plots.pad(schema, 5)
plots.ipy_display(schema)

## Load, calibrate and sample an ensemble of several models

In [None]:
num_samples = 2
model_paths = [model2_location, model4_location, model5_location]
data_path = "../../notebook/ensemble_eval_sa/NY_data2.csv"
weights = [1/len(model_paths) for i in model_paths]
solution_mappings = [{"I": "infected", "H": "hospitalized", "D": "dead"},
                     {"I": "infected", "H": "hospitalized", "D": "dead"}, 
                     {"I": "infected", "H": "hospitalized", "D": "dead"},
                     ]

# Run the calibration and sampling
result = load_and_calibrate_and_sample_ensemble_model(
    model_paths,
    data_path,
    weights,
    solution_mappings,
    num_samples,
    timepoints,
    verbose=True,
    total_population=19340000,
    num_iterations=200,
    time_unit="days",
    visual_options={"title": "Calibrated Ensemble", "subset":".*_sol"}
)

# Save results
result["data"].to_csv("../../notebook/ensemble_eval_sa/partII_ensemble_of_many_results.csv", index=False)
result["quantiles"].to_csv("../../notebook/ensemble_eval_sa/partII_ensemble_of_many_quantiles.csv", index=False)

# Plot results
schema = plots.trajectories(pd.DataFrame(result["data"]), subset=".*_sol",
                            points=test_data.reset_index(drop=True).rename(columns={"I":"I_data", "H":"H_data", "D":"D_data"}),
                           )
schema = plots.pad(schema, 5)
plots.ipy_display(schema)