# This is a notebook for synthesizing data to test calibration

In order to check that `calibrate` is returning a result that makes sense, we are going to:  

1. `sample` a model
2. use that output to generate synthetic data
3. then calibrate the model to that synthetic dataset
4. sanity check that the parameters/results are reasonable compared to the parameters used to create the synthetic data

See [this issue](https://github.com/ciemss/pyciemss/issues/448).

### Load dependencies

In [13]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pyciemss
from pyciemss.interfaces import calibrate

### Collect model and data paths

In [2]:
MODEL_PATH = "https://raw.githubusercontent.com/DARPA-ASKEM/simulation-integration/main/data/models/"
DATA_PATH = "../../../docs/source/"

# Models
petri1 = os.path.join(MODEL_PATH, "SEIRHD_with_reinfection01_petrinet.json")
regnet1 = os.path.join(MODEL_PATH, "LV_rabbits_wolves_model02_regnet.json")
stock1 = os.path.join(MODEL_PATH, "SEIRHDS_stockflow.json")

### Set parameters for sampling

In [3]:
start_time = 0.0
end_time = 100.0
logging_step_size = 10.0

# Use a single sample to generate synthetic data
num_samples = 1

## (1) Sample each model

In [4]:
petri_result = pyciemss.sample(petri1, end_time, logging_step_size, num_samples, start_time=start_time)
petri_result["data"]

Unnamed: 0,timepoint_id,sample_id,persistent_beta_param,persistent_gamma_param,persistent_hosp_param,persistent_death_hosp_param,persistent_I0_param,D_state_state,E_state_state,H_state_state,I_state_state,R_state_state,S_state_state,infected_observable_state,exposed_observable_state,hospitalized_observable_state,dead_observable_state
0,0,0,0.530644,0.368819,0.029934,0.025677,13.79646,0.037612,60.16711,1.364239,35.096813,92.005089,19339856.0,35.096813,60.16711,1.364239,0.037612
1,1,0,0.530644,0.368819,0.029934,0.025677,13.79646,0.139577,109.210358,2.692355,63.724625,262.970276,19339602.0,63.724625,109.210358,2.692355,0.139577
2,2,0,0.530644,0.368819,0.029934,0.025677,13.79646,0.329472,198.248413,4.916741,115.679718,570.920105,19339150.0,115.679718,198.248413,4.916741,0.329472
3,3,0,0.530644,0.368819,0.029934,0.025677,13.79646,0.674837,359.85675,8.929051,209.982895,1127.425171,19338368.0,209.982895,359.85675,8.929051,0.674837
4,4,0,0.530644,0.368819,0.029934,0.025677,13.79646,1.301821,653.132751,16.207748,381.12616,2135.088135,19336854.0,381.12616,653.132751,16.207748,1.301821
5,5,0,0.530644,0.368819,0.029934,0.025677,13.79646,2.439796,1185.188354,29.414717,691.637085,3961.471924,19334168.0,691.637085,1185.188354,29.414717,2.439796
6,6,0,0.530644,0.368819,0.029934,0.025677,13.79646,4.504788,2149.895508,53.369637,1254.730591,7272.960938,19329282.0,1254.730591,2149.895508,53.369637,4.504788
7,7,0,0.530644,0.368819,0.029934,0.025677,13.79646,8.250609,3897.30542,96.787956,2274.964844,13276.405273,19320482.0,2274.964844,3897.30542,96.787956,8.250609
8,8,0,0.530644,0.368819,0.029934,0.025677,13.79646,15.04094,7056.657715,175.381012,4120.488281,24153.117188,19304518.0,4120.488281,7056.657715,175.381012,15.04094


In [5]:
regnet_result = pyciemss.sample(regnet1, end_time, logging_step_size, num_samples, start_time=start_time)
regnet_result["data"]

Unnamed: 0,timepoint_id,sample_id,persistent_alpha_param,persistent_gamma_param,persistent_beta_param,persistent_delta_param,persistent_R0_param,persistent_W0_param,Rabbits_state_state,Wolves_state_state
0,0,0,1.004276,2.91725,1.051116,1.062666,10.963837,5.626074,0.321615,0.005714
1,1,0,1.004276,2.91725,1.051116,1.062666,10.963837,5.626074,6.372262,0.000586
2,2,0,1.004276,2.91725,1.051116,1.062666,10.963837,5.626074,0.113902,0.103626
3,3,0,1.004276,2.91725,1.051116,1.062666,10.963837,5.626074,2.174439,0.000157
4,4,0,1.004276,2.91725,1.051116,1.062666,10.963837,5.626074,0.082777,2.143745
5,5,0,1.004276,2.91725,1.051116,1.062666,10.963837,5.626074,0.742097,0.000782
6,6,0,1.004276,2.91725,1.051116,1.062666,10.963837,5.626074,14.264984,0.341125
7,7,0,1.004276,2.91725,1.051116,1.062666,10.963837,5.626074,0.254202,0.01059
8,8,0,1.004276,2.91725,1.051116,1.062666,10.963837,5.626074,5.027248,0.000281


In [6]:
stockflow_result = pyciemss.sample(stock1, end_time, logging_step_size, num_samples, start_time=start_time)
stockflow_result["data"]

Unnamed: 0,timepoint_id,sample_id,persistent_p_cbeta_param,persistent_p_cdelta_param,persistent_p_cgamma_param,persistent_p_hosp_param,persistent_p_death_param,persistent_p_los_param,persistent_p_roil_param,persistent_I0_param,persistent_H0_param,D_state_state,E_state_state,H_state_state,I_state_state,R_state_state,S_state_state
0,0,0,0.439759,0.261867,0.24233,0.180887,0.022249,8.062474,0.005127,4.246541,3.632898,0.072314,6.360102,2.047779,5.118945,10.452667,976.94812
1,1,0,0.439759,0.261867,0.24233,0.180887,0.022249,8.062474,0.005127,4.246541,3.632898,0.134092,14.177917,2.750715,11.563018,27.993053,944.38092
2,2,0,0.439759,0.261867,0.24233,0.180887,0.022249,8.062474,0.005127,4.246541,3.632898,0.242334,28.742546,5.460753,24.24683,64.787735,877.518921
3,3,0,0.439759,0.261867,0.24233,0.180887,0.022249,8.062474,0.005127,4.246541,3.632898,0.457476,48.568966,10.519651,43.764771,135.970215,761.718628
4,4,0,0.439759,0.261867,0.24233,0.180887,0.022249,8.062474,0.005127,4.246541,3.632898,0.836458,61.576202,16.936558,61.670853,249.110291,610.869446
5,5,0,0.439759,0.261867,0.24233,0.180887,0.022249,8.062474,0.005127,4.246541,3.632898,1.371315,55.997601,21.102896,63.699406,384.119995,474.708557
6,6,0,0.439759,0.261867,0.24233,0.180887,0.022249,8.062474,0.005127,4.246541,3.632898,1.952692,39.112293,20.183714,49.639233,500.995605,389.116577
7,7,0,0.439759,0.261867,0.24233,0.180887,0.022249,8.062474,0.005127,4.246541,3.632898,2.45128,23.764069,15.622018,32.178925,575.933472,351.050201
8,8,0,0.439759,0.261867,0.24233,0.180887,0.022249,8.062474,0.005127,4.246541,3.632898,2.810591,13.898696,10.530762,19.218241,611.224915,343.316833


## (2) Add noise to generate synthetic data

In [9]:
petri_data_df =petri_result["data"][["I_state_state", "H_state_state", "D_state_state"]]
petri_data_df

Unnamed: 0,I_state_state,H_state_state,D_state_state
0,35.096813,1.364239,0.037612
1,63.724625,2.692355,0.139577
2,115.679718,4.916741,0.329472
3,209.982895,8.929051,0.674837
4,381.12616,16.207748,1.301821
5,691.637085,29.414717,2.439796
6,1254.730591,53.369637,4.504788
7,2274.964844,96.787956,8.250609
8,4120.488281,175.381012,15.04094


In [15]:
# Take results from `sample` and add different levels of Gaussian noise to generate synthetic data
def add_gaussian_noise(data: pd.DataFrame, std_dev: float) -> pd.DataFrame:
    noise = np.random.normal(0, std_dev, size=data.shape)
    return data + noise

petri_noisy_data = add_gaussian_noise(petri_data_df, 0.1)
petri_noisy_data

Unnamed: 0,I_state_state,H_state_state,D_state_state
0,35.140551,1.250151,0.082774
1,63.698796,2.607929,-0.014256
2,115.711542,5.060911,0.525089
3,210.14105,8.829876,0.676045
4,381.195562,16.223516,1.255973
5,691.66445,29.40339,2.549931
6,1254.698601,53.28507,4.550166
7,2274.934835,96.727202,8.214578
8,4120.404414,175.330924,14.892642


In [None]:
# Calibrate the model to the synthetic dataset

In [None]:
# Sanity check: compare calibrated parameters to original