In [1]:
import os
from pyciemss.Ensemble.interfaces import (
    load_and_sample_petri_ensemble, load_and_calibrate_and_sample_ensemble_model
)
from pyciemss.visuals import plots

In [2]:
DEMO_PATH = "../../notebook/integration_demo/"
# ASKENET_PATH_1 = "https://raw.githubusercontent.com/DARPA-ASKEM/Model-Representations/main/petrinet/examples/sir_typed.json"
ASKENET_PATH_1 = "../../test/models/AMR_examples/ensemble/SEIARHDS_AMR.json"
ASKENET_PATH_2 = "../../test/models/AMR_examples/ensemble/SIRHD_AMR.json"

ASKENET_PATHS = [ASKENET_PATH_1, ASKENET_PATH_2]

## load_and_sample_ensemble_model

In [3]:
weights = [0.5, 0.5]
num_samples = 100
timepoints = [0.0, 1.0, 2.0, 3.0, 4.0]
solution_mappings = [{"Infected": "Cases", "Hospitalizations": "hospitalized_population"}, # model 1 mappings
                     {"Infected": "Infections", "Hospitalizations": "hospitalized_population"} # model 2
                     ]

# Run sampling
result = load_and_sample_petri_ensemble(
    ASKENET_PATHS, weights, solution_mappings, num_samples, timepoints, 
    time_unit="days",
    visual_options={"subset":".*_sol"}
)

ensemble_samples = result["data"]
q_ensemble = result["quantiles"]

# Save results
ensemble_samples.to_csv(os.path.join(DEMO_PATH, "results_petri_ensemble/sample_results.csv"), index=False)
q_ensemble.to_csv(os.path.join(DEMO_PATH, "results_petri_ensemble/quantile_results.csv"), index=False)

In [4]:
plots.ipy_display(result["visual"])




## load_and_calibrate_and_sample_ensemble_model

In [5]:
# # Generate data from model
# from pyciemss.utils.interface_utils import solutions_to_observations

# data = solutions_to_observations(timepoints, result["data"].set_index(["timepoint_id", "sample_id"]))
# data[0].to_csv(os.path.join(DEMO_PATH, "results_petri_ensemble/ensemble_data.csv"), index=False)

In [6]:
data_path = os.path.join(DEMO_PATH, "results_petri_ensemble/ensemble_data.csv")
num_samples = 2
timepoints = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]

# Run the calibration and sampling
result = load_and_calibrate_and_sample_ensemble_model(
    ASKENET_PATHS,
    data_path,
    weights,
    solution_mappings,
    num_samples,
    timepoints,
    verbose=True,
    total_population=1000,
    num_iterations=10,
    time_unit="days",
    visual_options={"title": "Calibrated Ensemble", "subset":".*_sol"}
)

# Save results
result["data"].to_csv(
    os.path.join(DEMO_PATH, "results_petri_ensemble/calibrated_sample_results.csv"), index=False
)
result["quantiles"].to_csv(
    os.path.join(DEMO_PATH, "results_petri_ensemble/calibrated_quantile_results.csv"), index=False
)
plots.ipy_display(result["visual"])

iteration 0: loss = 24.202240824699402



In [52]:
import pandas as pd
from typing import Dict, Optional, Iterable, Callable
def cdc_reformatcsv(
        filename: str, 
        solution_string_mapping: dict = None, 
        forecast_start_date: str = None, 
        location: str = None,
        drop_column_names: Iterable[str] = None,
        ):
    """ 
    Reformat the quantiles csv file to CDC ensemble forecast format
    """
    q_ensemble_data = pd.read_csv(filename)
    # Number of days for which data is available
    number_data_days = max(q_ensemble_data[q_ensemble_data["Forecast_Backcast"].str.contains("Backcast")]["number_days"])
    # Subtracting number of backast days from number_days
    q_ensemble_data["number_days"] = q_ensemble_data["number_days"] - number_data_days
    # Drop rows that are backcasting
    q_ensemble_data = q_ensemble_data[q_ensemble_data["Forecast_Backcast"].str.contains("Backcast")==False]
    # Changing name of state according to user provided strings
    if solution_string_mapping:
        for k, v in solution_string_mapping.items():
            q_ensemble_data["output"] = q_ensemble_data["output"].replace(k,v)

    # Creating target column
    q_ensemble_data["target"] = q_ensemble_data["number_days"].astype("string") + " days ahead " + q_ensemble_data["inc_cum"] + " " + q_ensemble_data["output"]
    
    # Add dates
    if forecast_start_date:
        q_ensemble_data["forecast_date"] = pd.to_datetime(forecast_start_date, format='%Y-%m-%d', errors='ignore')
        # q_ensemble_data["target_end_date"] = q_ensemble_data["forecast_date"] + pd.DateOffset(days=q_ensemble_data["number_days"].astype(int))
        q_ensemble_data["target_end_date"] = q_ensemble_data["forecast_date"].combine(q_ensemble_data["number_days"], lambda x,y: x + pd.DateOffset(days=int(y)))
    # Add location column
    if location:
        q_ensemble_data["location"] = location
    # Dropping columns specified by user
    if drop_column_names:
        q_ensemble_data = q_ensemble_data.drop(columns=drop_column_names)
    return q_ensemble_data

In [51]:
q_ensemble_data = cdc_reformatcsv(filename=os.path.join(DEMO_PATH, "results_petri_ensemble/calibrated_quantile_results.csv"), 
                                  solution_string_mapping={"Infected": "case", "Hospitalizations": "hosp.", "D": "death"}, 
                                  forecast_start_date="2023-08-03",
                                  location="US",
                                  drop_column_names=["timepoint_id", "number_days", "inc_cum", "output", "Forecast_Backcast"])
q_ensemble_data.to_csv(os.path.join(DEMO_PATH, "results_petri_ensemble/CDCformat_calibrated_quantile_results.csv"), index=False)