# Here is the description of the functions

The computer code takes q variables $(\lambda_1, \lambda_2, \dots, \lambda_q)$. $p$ of them are physical variables to calibrate and are teated with uniform prior, and $q-p$ are numerical variables to represent the model error and are treated with hierarchical model.
The array "index_lambda_p" indicates which of these variables are the physical variables and "index_lambda_q". 

- transform_Lambda takes a normalized vector of parameters $\boldsymbol{\lambda}_{norm}$, with the first $p$ variables treated as physical variable, and the next $q-p$ as the numerical variables, and return the vector $\boldsymbol{\lambda}$, with physical values and with each variable at the right position relatively to "index_lambda_p" and "index_lambda_q". 

- p_lambda_df takes a dataframe df_Lambda of vectors $(\boldsymbol{\lambda}_k)_{k=1}^M$ and a vector of hyperparameters $\boldsymbol{\alpha}$, and return the vector $p_{\boldsymbol{\Lambda}}(\boldsymbol{\lambda}_k \mid \boldsymbol{\alpha})_{k=1}^M$

- get_likelihoods_dflambda takes a dataframe df_Lambda of vectors $(\boldsymbol{\lambda}_k)_{k=1}^M$, and return the simulations, the standard deviations associated if gaussian processes are used, and the likelihoods associated to each $\boldsymbol{\lambda}_k.$ The argument sigma refers to the observations standard deviation; myCODE is the simulator (or the GP predictor), results_measures is the dataframe of the observations; index indicates the output considered for the computation of the likelihood (if multiple outputs are indicated, one likelihood will be computed for each output and each $\boldsymbol{\lambda}_k$); std_code indicates whether or not the standard deviation of the code should be considered, and idx_loo is the index of the observation that should be removed for the likelihood computation in the LOO scheme.

- sample_Lambda takes an integer $M$ and a vector of hyperparemters $\boldsymbol{\alpha}$ and returns a sample $(\boldsymbol{\lambda}_k)_{k=1}^M$ i.i.d. with density $p_{\boldsymbol{\Lambda}}(.\mid \boldsymbol{\alpha})$.

- likelihood_alpha takes a vector $\boldsymbol{\alpha}$, the likelihoods computed with another vector $\boldsymbol{\alpha}^\star$, $p_{\boldsymbol{\Lambda}}(\boldsymbol{\lambda}_k\mid \boldsymbol{\alpha}^\star)_{k=1}^{n}$ the prior densities computed with $\boldsymbol{\alpha}^\star$, and the dataframe $(\boldsymbol{\lambda}_k)_{k=1}^M$ sampled with $p_{\boldsymbol{\Lambda}}(.\mid \boldsymbol{\alpha}^\star)$. It returns the estimated likelihood of $\boldsymbol{alpha}$ with importance sampling.

- find_best takes $\boldsymbol{\alpha}^\star$, the likelihoods computed with $\boldsymbol{\alpha}^\star$, the sample $(\boldsymbol{\lambda}_k)_{k=1}^M$ i.i.d with density $p_{\boldsymbol{\Lambda}}(.\mid \boldsymbol{\alpha}^\star)$, and returns the estimated maximum a posteriori, considering uniform prior.




In [None]:


def transform_Lambda(Lambda, index_lambda_p, index_lambda_q):
    Lambda_new = [Lambda[(index_lambda_p + index_lambda_q).index(x)] for x in range(len(index_lambda_p + index_lambda_q))] #reorder vector
    Lambda_new = np.array([Lambda_new[x]*(bMAXlambda[x] - bMINlambda[x])+bMINlambda[x] for x in range(len(Lambda_new))]) #back to physical values
    return(Lambda_new)

def p_lambda_df(df_Lambda, alpha, index_lambda_p, index_lambda_q, scale = 0.45):
    scale = np.array([scale]*len(alpha)) #same scale for each variable
    lambd_norm = (df_Lambda-bMINlambda)/(bMAXlambda - bMINlambda) #normalize values
    def fun1(x): return all(0 <= coord <= 1 for coord in x) 
    coeff1 = lambd_norm.iloc[:,index_lambda_p].apply(fun1, axis=1) #Uniform [0,1] for the coordinates "index_lambda_p"
    a, b = (0 - alpha) / scale, (1 - alpha) / scale 
    coeff2=1
    for ii in range(len(alpha)):
        coeff2 = coeff2*truncnorm.pdf((lambd_norm.iloc[:,index_lambda_q[ii]].values - alpha[ii])/scale[ii], a[ii],b[ii])/scale[ii] #truncated gaussian for the coordinates "index_lambda_q"
    return coeff1*coeff2

def get_likelihoods_dflambda(df_Lambda, sigma, myCODE, results_measures, index = [1,2,3], std_code = False, idx_loo = None):
    Ysimu = myCODE(df_Lambda, index = index,  std_bool = std_code, vectorize = True, idx_loo = idx_loo) #Get simulations
    if std_code: #if gaussian process regression
        Ysimu, Ystd = Ysimu #Get std deviations and simulations
        res = [[np.prod(norm.pdf(results_measures.loc[list(set(range(len(results_measures))) - set([idx_loo])),f"Y{index[ii]}"].values-Ysimu[iii].iloc[:,ii].values, loc=0, scale=np.sqrt(sigma[index[ii]-1] + Ystd[iii].iloc[:,ii].values))) for ii in range(len(index))] for iii in range(len(Ysimu))] #compute gaussian likelihoods, considering std of observation noise and std of gaussian process
        return Ysimu, Ystd, np.array(res)
    else: #if deterministic simulator
        res = [[np.prod(norm.pdf(results_measures.loc[list(set(range(len(results_measures))) - set([idx_loo])),f"Y{index[ii]}"]-Ysimu[iii].iloc[:,ii], loc=0, scale=sigma[index[ii]-1])) for ii in range(len(index))] for iii in range(len(Ysimu))] #compute gaussian likelihoods, considering only observation noise
        return Ysimu, np.array(res)

def sample_Lambda(alpha, M, index_lambda_p, index_lambda_q, scale = 0.45):
    Lambda_list = []
    if len(index_lambda_q) > 0:
        scale = np.array([scale]*len(alpha))
        a, b = (0 - alpha) / scale, (1 - alpha) / scale
    for k in range(M):
        if len(index_lambda_q) > 0:
            sample_lambda_q = np.array([truncnorm.rvs(a[ii], b[ii], size=1)[0]*scale[ii] + alpha[ii] for ii in range(len(alpha))]) #truncated gaussian sample
            Lambda = transform_Lambda(np.concatenate([np.random.uniform(0,1,len(index_lambda_p)), sample_lambda_q]), index_lambda_p = index_lambda_p, index_lambda_q = index_lambda_q) #concatenate uniform sample with truncated gaussian sample, and use transform_Lambda to reorder and get physical values
        else: 
            Lambda = transform_Lambda(np.random.uniform(0,1,len(index_lambda_p)), index_lambda_p = index_lambda_p, index_lambda_q = index_lambda_q) #only uniform sample
        Lambda_list.append(Lambda)
    return pd.DataFrame(np.array(Lambda_list))

    
def likelihood_alpha(alpha, likelihoods_alpha_star, denom_is, df_Lambda, index_lambda_p, index_lambda_q, scale):
    ratio_is = np.array(p_lambda_df(df_Lambda = df_Lambda, alpha = alpha, index_lambda_p = index_lambda_p, index_lambda_q = index_lambda_q, scale = scale)/denom_is) #compute importance sampling ratios
    ratio_is = ratio_is.reshape(len(ratio_is),1)
    return np.mean(likelihoods_alpha_star*ratio_is) #Mean of the likelihoods weighted by the importance sampling ratios


def find_map(alpha_star, bounds, likelihoods_alpha_star, df_Lambda, index_lambda_p, index_lambda_q, scale):
    denom_is = p_lambda_df(df_Lambda = df_Lambda, alpha = alpha_star, index_lambda_p = index_lambda_p, index_lambda_q = index_lambda_q, scale = scale) #Compute the denomination of importance sampling ratio
    fun = lambda alpha: likelihood_alpha(alpha = alpha, likelihoods_alpha_star = likelihoods_alpha_star, denom_is = denom_is, df_Lambda = df_Lambda, index_lambda_p = index_lambda_p, index_lambda_q = index_lambda_q, scale = scale)
    baseline = fun(alpha_star)
    fun_opt = lambda alpha: -fun(alpha)/baseline #Normalized by baseline so that values of functions are not too low
    return minimize(fun_opt, alpha_star, method='L-BFGS-B', bounds=bounds).x #minimize the opposite with L-BFGS-B

