# This notebook runs all the different strategies 

Several design $\mathbb{X} = (\mathbf{x}_j)_{j=1}^n$ are investigated to ensure robustness. Each one is associated with a "seed_x", which is the random seed used to generate the design. For every design, a csv file called "results_measures_seed_x{seed_x}.csv" is stored, with the 10 first rows corresponding to the true values of the outputs and the 10 others to the noisy observations, obtained with sigma = 0.9 for the two first output variables and 0.3 for the last one. 

In [12]:
import numpy                as np
import pandas as pd
import pickle

from investigate_alphamap import func_alphamap,check_repeat
from bayes_alpha import *
from bayes_lambda import *
from full_bayes import *
from embedded_discrepancy import *

from mm_taylor import *

In [None]:
if(os.path.isdir("taylor_impact")==False): 
    os.mkdir("taylor_impact")

In [13]:
sigma = [0.9,0.9,0.3] #Standard deviation of the observation noise for each output variable
bMINlambda = np.array([330, 70,  8000, 3000, 0.1, 0.1]) #lower bounds for lambda 
bMAXlambda = np.array([470, 150, 10000, 5000, 5, 5.0]) #upper bounds for lambda

results_measures = pd.read_csv(f"measures_and_true_values.csv", index_col=0)
true_values = results_measures.iloc[:10,:] #get true values
results_measures = results_measures.iloc[10:,:] #get noisy observations

with open(f'mm_list.pkl', 'rb') as f: #get the GP hyperparameters
    mm_list = pickle.load(f)
    
calib_only = [1,2,3] #index of the calibration problems to consider. [1,2,3] means that the three configurations will be tested: observations of the first variable, of the second one, or of the third one. 

loo = False

# Hierarchical model

In [15]:
index_lambda_p = [2,3,4,5] #indexes of the four physical parameters
index_lambda_q = [0,1] #indexes of the two numerical parameters
model_error = True #Model error is  considered, so hierarchical bayesian model for the two numerical parameters
scale = 0.45 #std deviation of the truncated gaussian prior
pre_path = "taylor_impact/hierarchical_model" #where to store the results


## First we estimate alpha_map

In [17]:
M = 10 #Initial number of i.i.d realizations
iter_lim = 10#Maximum number of iterations
threshold = 0.02

alpha_min = -10 #lower bound for alpha
alpha_max = 10 #upper bound for alpha
delta_alpha = 4 #maximum half width of the interval investigated. The interval will be [max(alpha_min, alpha_star - delta_alpha), min(alpha_max, alpha_star + delta_alpha)] 

[func_alphamap(index_calib = index_calib, M = M, iter_lim = iter_lim, threshold = threshold, alpha_min = alpha_min, alpha_max = alpha_max, delta_alpha = delta_alpha, scale = scale, results_measures = results_measures, sigma = sigma, myCODE = myCODE, mm_list = mm_list, index_lambda_p = index_lambda_p, index_lambda_q = index_lambda_q, bMINlambda= bMINlambda, bMAXlambda = bMAXlambda, pre_path = pre_path, loo = loo, std_code = std_code) for index_calib in calib_only]

IDX LOO  0
IDX LOO  1
IDX LOO  2
IDX LOO  3
IDX LOO  4
IDX LOO  5
IDX LOO  6
IDX LOO  7
IDX LOO  8
IDX LOO  9
IDX LOO  0
IDX LOO  1
IDX LOO  2
IDX LOO  3
IDX LOO  4
IDX LOO  5
IDX LOO  6
IDX LOO  7
IDX LOO  8
IDX LOO  9
IDX LOO  0
IDX LOO  1
IDX LOO  2
IDX LOO  3
IDX LOO  4
IDX LOO  5
IDX LOO  6
IDX LOO  7
IDX LOO  8
IDX LOO  9


[None, None, None]

## Then we generate posterior samples $(A_i)_{i=1}^N \propto p(yobs \mid \alpha)p_A(\alpha)$ 

In [19]:
tune_size = 10
size = 25
rngseed = 10
num_chain = 3
M = 10

alpha_min = -10 #lower bound for alpha
alpha_max = 10 #upper bound for alpha
delta_alpha = 4 #maximum half width of the interval investigated. The interval will be [max(alpha_min, alpha_star - delta_alpha), min(alpha_max, alpha_star + delta_alpha)] 

samples_alpha = [MCMC_alpha_multichains(index_calib=index_calib, scale=scale, num_chain = num_chain, tune_size=tune_size, size=size, M = M,  alpha_min = alpha_min, alpha_max = alpha_max, delta_alpha = delta_alpha, rngseed=rngseed, results_measures=results_measures, sigma=sigma, myCODE = myCODE, mm_list = mm_list, index_lambda_p=index_lambda_p, index_lambda_q=index_lambda_q, bMINlambda=bMINlambda, bMAXlambda=bMAXlambda, pre_path = pre_path, loo = loo, std_code = False) for index_calib in calib_only]

0
 [-----------------100%-----------------] 35 of 35 complete in 0.1 sec1
 [-----------------100%-----------------] 35 of 35 complete in 0.1 sec2
 [-----------------100%-----------------] 35 of 35 complete in 0.1 sec3
 [-----------------100%-----------------] 35 of 35 complete in 0.1 sec4
 [-----------------100%-----------------] 35 of 35 complete in 0.1 sec5
 [-----------------100%-----------------] 35 of 35 complete in 0.1 sec6
 [-----------------100%-----------------] 35 of 35 complete in 0.1 sec7
 [-----------------100%-----------------] 35 of 35 complete in 0.1 sec8
 [-----------------100%-----------------] 35 of 35 complete in 0.1 sec9
 [-----------------100%-----------------] 35 of 35 complete in 0.1 sec0
 [-----------------100%-----------------] 35 of 35 complete in 0.1 sec1
 [-----------------100%-----------------] 35 of 35 complete in 0.1 sec2
 [-----------------100%-----------------] 35 of 35 complete in 0.1 sec3
 [-----------------100%-----------------] 35 of 35 complete in

## Then we generate posterior samples $(\Lambda_k)_{k=1}^M \propto p(yobs \mid \lambda)p_\Lambda(\lambda\mid \alpha_{MAP})$ 

In [21]:
tune_size = 35
size = 10
num_chain = 3
rngseed=1234
#MCMC_multichains generates the posterior samples 
[MCMC_lambda_multichains(index_calib = index_calib, model_error = model_error, scale = scale, num_chain = num_chain, tune_size = tune_size, size = size, myCODE = myCODE, mm_list = mm_list, results_measures = results_measures, sigma = sigma, index_lambda_p = index_lambda_p, index_lambda_q = index_lambda_q, bMINlambda = bMINlambda, bMAXlambda = bMAXlambda, rngseed = rngseed, pre_path = pre_path, nb_outputs = nb_outputs, loo = loo, std_code = False) for index_calib in calib_only]

#bayes_lambda_results computes the performance metrics associated with this sample (with plug-in alpha)
[bayes_lambda_results(index_calib = index_calib, pre_path = pre_path, true_values = true_values) for index_calib in calib_only]

 [-----------------100%-----------------] 45 of 45 complete in 0.1 sec

  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)


[None, None, None]

Finally, we compute the estimator o
$$E_{N,M}(h(\Lambda)) = \frac{1}{N}\sum_{i=1}^N \frac{\sum_{k=1}^M h(\Lambda_k) \frac{p_{\boldsymbol{\Lambda}}(\boldsymbol{\Lambda}_k \mid \boldsymbol{\boldsymbol{A_i}})}{p_{\boldsymbol{\Lambda}}(\boldsymbol{\Lambda}_k \mid \boldsymbol{\boldsymbol{\alpha}^\star})}}{\sum_{k=1}^M  \frac{p_{\boldsymbol{\Lambda}}(\boldsymbol{\Lambda}_k \mid \boldsymbol{\boldsymbol{A_i}})}{p_{\boldsymbol{\Lambda}}(\boldsymbol{\Lambda}_k \mid \boldsymbol{\boldsymbol{\alpha}^\star})}}$$ 

In [23]:
[full_bayes_results(index_calib = index_calib, index_lambda_p = index_lambda_p, index_lambda_q = index_lambda_q, scale = scale, bMINlambda = bMINlambda, bMAXlambda = bMAXlambda,pre_path = pre_path, true_values = true_values,loo = loo, std_code = std_code) for index_calib in calib_only]

  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)


[None, None, None]

# Uniform prior

In [25]:
index_lambda_p = [2,3,4,5,0,1] #here, all variables are considered with uniform prior
index_lambda_q = [] #no hierarchical model
model_error = True 
scale = None 
pre_path = "taylor_impact/uniform_error" #where to store the results

## We generate posterior samples $(\Lambda_k)_{k=1}^M \propto p(yobs \mid \lambda)p_\Lambda(\lambda)$ 

In [27]:
#MCMC_multichains generates the posterior samples 
[MCMC_lambda_multichains(index_calib = index_calib, model_error = model_error, scale = scale, num_chain = num_chain, tune_size = tune_size, size = size, myCODE = myCODE, mm_list = mm_list, results_measures = results_measures, sigma = sigma, index_lambda_p = index_lambda_p, index_lambda_q = index_lambda_q, bMINlambda = bMINlambda, bMAXlambda = bMAXlambda, rngseed = rngseed, pre_path = pre_path, nb_outputs = nb_outputs, loo = loo, std_code = std_code) for index_calib in calib_only]

#bayes_lambda_results computes the performance metrics associated with this sample (with plug-in alpha)
[bayes_lambda_results(index_calib = index_calib, pre_path = pre_path, true_values = true_values) for index_calib in calib_only]

 [-----------------100%-----------------] 45 of 45 complete in 0.1 sec

  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)


[None, None, None]

# No model error

In [29]:
index_lambda_p = [2,3,4,5] #indexes of the four physical parameters
index_lambda_q = [0,1] #indexes of the two numerical parameters
model_error = False #No model error is considered, so the two numerical parameters are fixed
scale = None 
pre_path = "taylor_impact/no_error"

## We generate posterior samples $(\Lambda_k)_{k=1}^M \propto p(yobs \mid \lambda)p_\Lambda(\lambda)$ 

In [31]:
#MCMC_multichains generates the posterior samples 
[MCMC_lambda_multichains(index_calib = index_calib, model_error = model_error, scale = scale, num_chain = num_chain, tune_size = tune_size, size = size, myCODE = myCODE, mm_list = mm_list, results_measures = results_measures, sigma = sigma, index_lambda_p = index_lambda_p, index_lambda_q = index_lambda_q, bMINlambda = bMINlambda, bMAXlambda = bMAXlambda, rngseed = rngseed, pre_path = pre_path, nb_outputs = nb_outputs, loo = loo, std_code = std_code) for index_calib in calib_only]

#bayes_lambda_results computes the performance metrics associated with this sample (with plug-in alpha)
[bayes_lambda_results(index_calib = index_calib, pre_path = pre_path, true_values = true_values) for index_calib in calib_only]

 [-----------------100%-----------------] 45 of 45 complete in 0.2 sec

  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)


[None, None, None]

# Embedded discrepancy

## We generate posterior samples $(\tilde{\Lambda}_k)_{k=1}^M \propto p(yobs \mid \tilde{\lambda})p_\Lambda(\tilde{\lambda})$, with $\tilde{\lambda} = (\lambda^1, \lambda^2)$

In [34]:
from scipy.stats import qmc

pre_path = f"seed_x{seed_x}" + suff + "/embedded_discrepancy"

index_lambda_p = [0,1,2,3,4,5]
index_lambda_q = []

R = 50 #number of samples for ksi

np.random.seed(10)
lhs = qmc.LatinHypercube(d = len(index_lambda_p), scramble=False, optimization="random-cd", seed = 123) #sample for ksi is obtained with LHS as ksi is uniform
u = lhs.random(n=R)*2 - 1

tune_size = 30
size = 20
rngseed = 432
num_chain = 1

[MCMC_multichains(index_calib = index_calib, num_chain = num_chain, tune_size = tune_size, size = size, u = u, mm_list = mm_list, results_measures = results_measures,sigma = sigma, index_lambda_p = index_lambda_p, bMINlambda = bMINlambda, bMAXlambda = bMAXlambda, rngseed = rngseed, pre_path = pre_path, loo = loo) for index_calib in calib_only]

[results_embed(index_calib = index_calib, num_chain = num_chain, tune_size = tune_size, size = size, u = u, mm_list = mm_list, results_measures = results_measures,sigma = sigma, index_lambda_p = index_lambda_p, bMINlambda = bMINlambda, bMAXlambda = bMAXlambda, pre_path = pre_path, true_values = true_values, rngseed = rngseed, loo = loo) for index_calib in calib_only]

[None, None, None]