In [None]:
from summer2 import CompartmentalModel
from summer2.parameters import Parameter
import arviz as az
import Calibrate as cal
import seaborn as sns
from jax.scipy.stats import gaussian_kde
from jax import lax

import jax.numpy as jnp

import numpy as np
import matplotlib.pyplot as plt

import pymc as pm
from estival.wrappers import pymc as epm

import numpyro
from numpyro import infer
from numpyro import distributions as dist
from jax import random
import pickle
from scipy.special import kl_div, rel_entr

In [None]:
def build_model():

    sir_model = CompartmentalModel([0.0,100.0],["S","I","R"],["I"])
    sir_model.set_initial_population({"S": 999.0, "I": 1.0})
    sir_model.add_infection_frequency_flow("infection",Parameter("contact_rate"),"S","I")
    sir_model.add_transition_flow("recovery",Parameter("recovery_rate"),"I","R")

    sir_model.request_output_for_flow("incidence", "infection")
    
    return sir_model

sir_model = build_model()

In [None]:
parameters = {
    "contact_rate": 0.3,
    "recovery_rate": 0.1
}
sir_model.run(parameters)
res = sir_model.get_derived_outputs_df()
# res['incidence'].plot()

# Sample from a known distribution

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import truncnorm

def sample_from_truncnorm(mean, std_dev, lower_bound, upper_bound, sample_size, name):
    a = (lower_bound - mean) / std_dev
    b = (upper_bound - mean) / std_dev
    samples = truncnorm.rvs(a, b, loc=mean, scale=std_dev, size=sample_size)

    return pd.DataFrame(samples, columns=[name])

### Generate a new sample

In [None]:
samples = {
    "contact_rate":  pd.concat(
        [
            sample_from_truncnorm(0.07, 0.005, 0.15, 0.25, 5000, "contact_rate"),
            sample_from_truncnorm(0.3, 0.013, 0.25, 0.35, 10000, "contact_rate"),
        ],       
        ignore_index=True
    )
}
# #Storing our samples for later use
# samples
# with open("./true_sample_severe.pkl", 'wb') as fp:
        # pickle.dump(samples['contact_rate'], fp)


### Or upload the samples used in the paper mainly for result compararison

In [None]:
#Load samples used in the paper
with open("./true_sample.pkl", 'rb') as fp:
       samples = pickle.load(fp)  

In [None]:
sns.kdeplot(samples["contact_rate"], fill=True)

# Run model forward (i.e. feed the samples to the model)

In [None]:
from estival.model import BayesianCompartmentalModel
import estival.priors as esp
import estival.targets as est
from estival.sampling import tools as esamp


priors = [
    esp.UniformPrior("contact_rate", [0.0, 0.1]),
]
targets = []
bcm = BayesianCompartmentalModel(model=sir_model,priors=priors, targets=targets,parameters=parameters)
samples_for_estival = [{"contact_rate": samples["contact_rate"].iloc[i]} for i in range(len(samples["contact_rate"]))]


model_runs = esamp.model_results_for_samples(samples_for_estival, bcm)

In [None]:
model_runs.results['incidence'].plot(legend=False)

## Collect the synthetic data and generate likelihood components

In [None]:
data_times = list(range(10, 91, 10))
likelihood_comps = {t: gaussian_kde(jnp.array(model_runs.results['incidence'].loc[t]), bw_method=0.01) for t in data_times}

In [None]:
# Check one likelihood component
for t in data_times:
    kde = likelihood_comps[t]
    x_values = np.linspace(0, 50, 1000)
    pdf_values = kde(x_values)
    plt.plot(x_values, pdf_values)

    model_runs.results['incidence'].loc[t].plot.hist(density=True, bins=50)
    plt.show()

# Refit the model using the likelihood components derived from synthetic data

In [None]:
# Flat prior
priors = [
    esp.UniformPrior("contact_rate", [0.1, 0.5]),
]
n_data_points = len(data_times)
# Define a custom target using the likelihood components
def make_eval_func(t):
    def eval_func(modelled, obs, parameters, time_weights):
        likelihood_comp = likelihood_comps[t](modelled) 
        likelihood_comp = jnp.max(jnp.array([likelihood_comp, jnp.array([1.e-300])]))  # to avoid zero values.
        return jnp.log(likelihood_comp) / n_data_points

    return eval_func

targets = [est.CustomTarget(f"likelihood_comp_{t}", pd.Series([0.], index=[t]), make_eval_func(t), model_key='incidence') for t in data_times]

refit_bcm = BayesianCompartmentalModel(model=sir_model,priors=priors, targets=targets,parameters=parameters)

# Sampling 
Runing sampling algorithms over the refited model in order to recover the initial samples

### Pymc sampler

In [None]:
chains = 4
init_vals = []
for c in range(chains):
    init_vals.append({"contact_rate": np.random.uniform(0.01,0.6) })


In [None]:
IDATA = dict()
results_df = pd.DataFrame()

Draws = [1000]*4 # Increase the number of draws for better accuracy
Sampler = [pm.sample_smc,pm.Metropolis, pm.DEMetropolis, pm.DEMetropolisZ]
for sampler, draws in zip(Sampler, Draws):
    results = cal.Single_analysis(sampler = sampler, 
            draws = draws,
            chains=4,
            cores = 4,
            tune = 100,
            bcm_model = refit_bcm,
            # initial_params = init_vals
)
            
    results_df = pd.concat([results_df,results])



results_df = results_df.reset_index(drop=True)

# with open('./Results/Reverse_Ingineering/Exper_3_severe_trough.pkl', 'wb') as fp:
#     pickle.dump(results_df, fp)

### NUTS sampling (Numpyro)

We need to define quickly a numpyro compatible model. Here only the parameter "contact_rate" is involved

In [16]:
def nmodel():
    sampled = {"contact_rate":numpyro.sample("contact_rate", dist.Uniform(0.0,1.0))}# for k in refit_bcm.parameters}
    ll = numpyro.factor("ll", refit_bcm.loglikelihood(**sampled))

#Initialisation
# init_vals_nuts = {"contact_rate": jnp.full(4, 0.26) }

#init_vals_nuts = {"contact_rate": jnp.array(np.random.uniform(0.,.6, 4)) }

    

In [18]:
sampler = infer.NUTS
results = cal.Single_analysis(sampler = sampler, 
            draws = 1000, #Increase this number for better accuracy
            chains=4,
            cores = 4,
            tune = 100,
            bcm_model = refit_bcm,
            nmodel=nmodel,
            # initial_params = init_vals_nuts

    )
results_df = pd.concat([results_df,results])
results_df = results_df.reset_index(drop=True)

In [20]:
results_df

In [None]:
cal.plot_comparison_bars(results_df=results_df)

# Multirun analyse

Please refer to the fitting_simulation_script.py for the 100 runs of each alogirthm.

Here we try to establish statistics from theses runs.

## Uploading previous results 
We upload the results obtained from the pyhton script mentioned above.
Make sure to locate properly yours. Here is an example. This file is too heavy to be uploaded in the Github repository. 

In [None]:
with open("./Results/Reverse_Ingineering/Multi_run_all.pkl", 'rb') as fp:
    all_results = pickle.load(fp) #It's a dict


all_results = pd.read_pickle("./Results/Reverse_Ingineering/Multi_run_all.pkl") #It's a dict

res = pd.concat(all_results) #To a pd.DataFrame


In [21]:
#Computing the Relative ESS
res["Rel_Ess"] = res['Min_Ess'].astype(float)/(res["Draws"].astype(float)*res['Chains'].astype(float))

In [None]:
summary_mean, prcnt_succ = cal.group_summary(res)

### Computing the Kullback-Leibler divergence against the known distribution

In [23]:
# kl_div_threshold = 0.01
def Kullback_Leibler_div(all_results, true_sample):
        true_sample = samples.to_numpy(dtype=np.float64)
        true_sample = true_sample.reshape(-1) #Reshaping to a 1d array
        true_sample = true_sample/np.sum(true_sample)
        for sampler in all_results.keys():
                df = all_results[sampler]
                df["KL_div"] = df["Rhat_max"]#create a new column 

                for row in df.index:
                        trace = df.Trace.loc[row]
                        Predict_sample = np.array(trace.posterior.to_dataframe()["contact_rate"].to_list())
                        #Normalizing the distribution
                        #We select only the last "true_sample.size" elements of the predicted
                        #To ensure matching shape
                        Predict_sample = Predict_sample[-true_sample.shape[0]:]
                        Predict_sample = Predict_sample/np.sum(Predict_sample)

                        df.at[row,"KL_div"] = np.sum(kl_div(true_sample,Predict_sample)).round(7)
                all_results[sampler] = df #Updating 
        return all_results

In [None]:
df = Kullback_Leibler_div(all_results,samples)


### Selecting min KL_div for each sampler over 100 runs

In [None]:
#____Selecting min KL_div for each sampler over 100 runs
best_results = pd.DataFrame()
for sampler in df.keys():
    temp = df[sampler]
    best_kldiv = temp.loc[[temp["KL_div"].idxmin()]]
    best_results = pd.concat([best_results,best_kldiv])

best_results = best_results.reset_index(drop=True)
#Computing the Relative ESS
best_results["Rel_Ess"] = best_results['Min_Ess'].astype(float)/(best_results["Draws"].astype(float)*best_results['Chains'].astype(float))

In [None]:
best_results

In [29]:
def plot_bars(results_df):
    pd.options.plotting.backend = "matplotlib"
    fig, axes = plt.subplots(1, 4, figsize=(10, 5))
    ax = axes[0]
    results_df.plot.bar(y="Ess_per_sec", x="Sampler", ax=ax, legend=False)
    ax.set_title("Ess_per_sec")
    ax.set_xlabel("")
    labels = ax.get_xticklabels()
    plt.xticks(rotation=45, ha='right')
    ax = axes[1]
    results_df.plot.bar(y="Rel_Ess", x="Sampler", ax=ax, legend=False)
    ax.set_title("Relative ESS")
    ax.set_xlabel("")
    labels = ax.get_xticklabels()
    plt.xticks(rotation=45, ha='right')

    ax = axes[2]
    results_df.plot.bar(y="Rhat_max", x="Sampler", ax=ax, legend=False)
    ax.set_title(r"Max $\hat{R}$")
    ax.set_xlabel("")
    ax.set_ylim(1)
    labels = ax.get_xticklabels()
    plt.xticks(rotation=45, ha='right')

    ax = axes[3]
    results_df.plot.bar(y="KL_div", x="Sampler", ax=ax, legend=False)
    ax.set_title("Min KL_div")
    ax.set_xlabel("")
    ax.set_ylim(0.005)
    labels = ax.get_xticklabels()
    plt.xticks(rotation=45, ha='right')

    plt.suptitle(f"Sampler Comparison", fontsize=12)
    plt.tight_layout()


In [None]:
plot_bars(best_results)

### Ploting the trace for the best results

In [None]:
for idata, sampler in zip(best_results.Trace, best_results.Sampler):
    print(sampler)
    az.plot_trace(idata,figsize=(9,4))
    plt.show()

### Posterior distribution Comparison

In [None]:
fig, axes = plt.subplots(1, 5, figsize=(18, 3))
i = 0
for sampler , idata in zip(best_results.Sampler,best_results.Trace):
    ax = axes[i]
    posterior_sample = idata.posterior.to_dataframe()['contact_rate'].to_list()
    # plt.hist(samples["contact_rate"],histtype='step', bins=50, density=True, label="true sample")
    # plt.hist(posterior_sample, bins=50, histtype='step',density=True, label="posterior by "+ sampler)
    sns.kdeplot(samples,ax = ax, fill=True, label="true sample")
    sns.kdeplot(posterior_sample,ax = ax, fill=True, label= sampler)
    ax.legend(loc = "upper left")
    i = i+1
    # ax.set_xlabel("")

plt.suptitle(f"Posterior by different MCMC samplers", fontsize=12)
plt.tight_layout()


In [None]:
# lls = esamp.likelihood_extras_for_idata(idata, refit_bcm)
lls = esamp.likelihood_extras_for_samples(idata.posterior, refit_bcm)

In [None]:
lls['logposterior'].min()

In [None]:
lls['logposterior'].plot.hist()

In [None]:
posterior_model_runs = esamp.model_results_for_samples(idata, refit_bcm)

In [None]:
D = posterior_model_runs.results['incidence']#[149].plot(legend="refit")
# model_runs.results['incidence'][149].plot(legend="true")

In [None]:
D = pd.concat([D[0],D[1], D[2],D[3]], axis=1, join="outer", ignore_index=True)

In [None]:
D.set_index(D.index, inplace=True)

In [None]:
model_runs.results['incidence'][9].plot(label= "true", legend=True)
D[15000].plot(label = "fit", legend=True)

## Landscape Analysis of the posterior distribution

In [None]:
#____We generate a sample of parameters which will allow us to study the landscape of our logposterior
#____Using Latin Hypercube Sampling to do so

#!pip install pflacco #Installing the pflacco package for Landscape Analysis
#---------Some features---------------
from pflacco.classical_ela_features import *
from pflacco.classical_ela_features import calculate_ela_distribution
from pflacco.misc_features import calculate_fitness_distance_correlation
from pflacco.local_optima_network_features import compute_local_optima_network, calculate_lon_features

#__To___create_a_initial____sample
from pflacco.sampling import create_initial_sample

In [None]:
def Loglikelihood(x):
    #x can be a vector where parameters values represents the components
    #if calibrating many parameters; Here only the contact rate is calibrated.

    params = {"contact_rate": x, "recovery_rate": 0.1}
    return float(refit_bcm.loglikelihood(**params))
def negative(f, *args):
    """
    Wrap a positive function such that a minimizable version is returned instead
    Args:
    f: The callable to wrap
    """
    def _reflected(*args):
        return float(0.0 - f(*args))

    return _reflected(*args)
def obj_func (x):
    return negative(Loglikelihood,*x)

In [None]:
# Create inital sample using latin hyper cube sampling
n = 2**6
X = create_initial_sample(1, n, sample_type = "sobol")

Y = X.apply(lambda x: obj_func(x),axis=1)


In [None]:
ela_distr = calculate_ela_distribution(X, Y.values.reshape(-1))

In [None]:
ela_distr