# This is a notebook for synthesizing data to test calibration

In order to check that `calibrate` is returning a result that makes sense, we are going to:  

1. `sample` a model
2. use that output to generate synthetic data
3. then calibrate the model to that synthetic dataset
4. sanity check that the parameters/results are reasonable compared to the parameters used to create the synthetic data

See [this issue](https://github.com/ciemss/pyciemss/issues/448).

### Load dependencies

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pyciemss
from pyciemss.interfaces import calibrate

### Collect model and data paths

In [None]:
MODEL_PATH = "https://raw.githubusercontent.com/DARPA-ASKEM/simulation-integration/main/data/models/"
DATA_PATH = "../../docs/source/"

# Models
petri1 = os.path.join(MODEL_PATH, "SEIRHD_with_reinfection01_petrinet.json")
regnet1 = os.path.join(MODEL_PATH, "LV_rabbits_wolves_model02_regnet.json")
stock1 = os.path.join(MODEL_PATH, "SIR_stockflow.json")
stock2 = os.path.join(MODEL_PATH, "SEIRHDS_stockflow.json")

### Set parameters for sampling

In [None]:
# What is logging_step_size?

In [None]:
start_time = 0.0
end_time = 150.0
logging_step_size = 10.0

### Define functions for generating synthetic data

In [None]:
# Function to add Gaussian noise to `sample` results
def add_gaussian_noise(data: pd.DataFrame, std_dev: float, col_state_map: dict) -> pd.DataFrame:
    noise = np.random.normal(0, std_dev, size=data.shape)
    noisy_data = data + noise
    noisy_data.insert(0, 'Timestamp', noisy_data.index.astype(float))
    col_state_map = {'Timestamp': 'Timestamp', **col_state_map}
    noisy_data = noisy_data.rename(columns=col_state_map)
    return noisy_data

# Function to sample from a model and generate synthetic data
def synthetic_data(model, col_state_map, end_time, logging_step_size, noise_level):
    num_samples = 1
    result = pyciemss.sample(model, end_time, logging_step_size, num_samples)
    data_df = result["data"][list(col_state_map.keys())]
    noisy_data = add_gaussian_noise(data_df, noise_level, col_state_map)
    petri_noisy_data.to_csv('noisy_data.csv', index=False)
    return petri_noisy_data

# TODO: make_plot=True

## (1) Create synthetic data from a given model

In [None]:
col_state_map = {'I_state_state': 'Cases', 'H_state_state': 'Hosp', 'D_state_state': 'Deaths'}
noise_level = 0.0
synthetic_data(petri1, col_state_map, end_time, logging_step_size, noise_level)

## (2) Calibrate the model to the synthetic data

In [None]:
data_mapping = {"Cases": "I", "Hosp": "H", "Deaths": "D"} # data_mapping = "column_name": "observable/state_variable"
num_iterations = 100
dataset = DATA_PATH + "noisy_data.csv"

calibrated_results = calibrate(petri1, dataset, data_mapping=data_mapping, num_iterations=num_iterations)
parameter_estimates = calibrated_results["inferred_parameters"]
calibrated_results

In [None]:
parameter_estimates()

In [None]:
calibrated_sample_results = pyciemss.sample(model1, end_time, logging_step_size, num_samples, 
                start_time=start_time, inferred_parameters=parameter_estimates)
calibrated_sample_results

In [None]:
# Sanity check: compare calibrated parameters to original