# Analyze the TIME / ESS distribution 

Randomly choose 25 samples and evaluate the model's Time / ESS 

### Import the libraries 

In [7]:
# Standard library imports
import sys

import os
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['KERAS_BACKEND'] = 'tensorflow'

# Third-party library imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
import arviz as az
import timeit

import scipy.stats as stats
from keras.models import Model as Model_nn
from keras.models import Sequential, load_model
from keras.layers import Dense, Concatenate
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau
from keras.layers import Input, Dense, Add

#Try with TinyDA
import tinyDA as tda
from scipy.stats import multivariate_normal
from scipy.stats import uniform
from itertools import product


# Local module imports
sys.path.append('../../')
sys.path.append('../../solver')
#sys.path.append('./src/InverseProblems')
#sys.path.append('./src/utils')
from utils import * 
from plotting import *
from random_process import *
from model import *

### Choose the 25 random samples

In [8]:
n = 25 
np.random.seed(2109)
random_samples = np.random.randint(0, 160, n)
random_samples

array([57, 32, 55, 69,  3])

### Load the data and surrogate model 

In [9]:
# Extract test data for visualization or further processing
n_eig = 64
X_values = np.loadtxt('../../data/50-25-10/X_test_50resolution.csv', delimiter = ',')
y_values = np.loadtxt('../../data/50-25-10/y_test_50resolution.csv',delimiter = ',')

#### Load Low fidelity

In [10]:
# Choose the model parameters 
n_samples_lf = 16000
coeff_lf = 1e-08

In [11]:
# Initialize the neural network model
model_l = Sequential([
    Dense(256, input_shape=(X_values.shape[1],), activation='gelu'),
    Dense(256, activation='gelu'),
    Dense(256, activation='gelu'),
    Dense(256, activation='gelu'),
    Dense(256, activation='gelu'),
    Dense(256, activation='gelu'),
    Dense(25, activation='linear')
])

model_l = load_model(f'../models/model_50resolution_{n_samples_lf}samples_1.keras')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


#### Load High Fidelity

In [12]:
# Resolution parameters
resolution_h1 = (50, 50)
resolution_h2 = (25, 25)

# PDE parameters
field_mean = 1
field_stdev = 1
lamb_cov = 0.1
mkl = 64

# Set up the model(s)
solver_h1 = Model(resolution_h1, field_mean, field_stdev, mkl, lamb_cov)
solver_h2 = Model(resolution_h2, field_mean, field_stdev, mkl, lamb_cov)


# Adjust the trasmissivity based on h1
list1 = solver_h1.solver.mesh.coordinates()
list2 = solver_h2.solver.mesh.coordinates()

# Convert lists to numpy arrays if they are not already
array1 = np.array(list1)
array2 = np.array(list2)

# Convert to structured arrays for easy row-wise comparison
dtype = {'names': ['f{}'.format(i) for i in range(array1.shape[1])],
         'formats': [array1.dtype] * array1.shape[1]}

structured_array1 = array1.view(dtype)
structured_array2 = array2.view(dtype)

# Create the boolean vector by checking if each row in array1 is in array2
bool_vector2 = np.in1d(structured_array1, structured_array2)

# Set the trasmissivity field
solver_h2.random_process.eigenvalues = solver_h1.random_process.eigenvalues
solver_h2.random_process.eigenvectors = solver_h1.random_process.eigenvectors[bool_vector2]

x_data = y_data = np.array([0.1, 0.3, 0.5, 0.7, 0.9])
datapoints = np.array(list(product(x_data, y_data)))

def solver_h2_data(x):
    solver_h2.solve(x)
    return solver_h2.get_data(datapoints)




# Choose the model parameters 
n_samples = 16000
coeff = 1e-08

n_neurons = 256
# Initialize the neural network model
# Define the three branches of the model
input_params = Input(shape=(X_values.shape[1],))
input_pod = Input(shape=(y_values.shape[1],))

# Define the first branch (parameters)
x1 = Dense(n_neurons, activation='gelu')(input_params)
x1 = Dense(n_neurons, activation='gelu')(x1)
x1 = Dense(n_neurons, activation='gelu')(x1)
x1 = Dense(n_neurons, activation='gelu')(x1)
x1 = Dense(n_neurons, activation='gelu')(x1)
x1 = Dense(n_neurons, activation='gelu')(x1)

# Define the second branch (POD)
x2 = Dense(n_neurons, activation='gelu')(input_pod)

# # Define the second branch (POD)
# x3 = Dense(n_neurons, activation='gelu', kernel_regularizer=l2(w))(input_nn)

# Combine the outputs of the three branches
combined = Add()([x1,x2])
combined = Dense(n_neurons, activation='gelu')(combined)
output = Dense(25, activation='linear')(combined)

# Create the model
model_h = Model_nn(inputs=[input_params,input_pod], outputs=output)
model_h = load_model(f'..//models/model_1step_50-25resolution_{n_samples}samples_1.keras')

In [13]:
model_lf = lambda input : model_l(input.reshape(1,64)).numpy().reshape(25) 
model_hf = lambda input: model_h([input.reshape(1,64), solver_h2_data(input).reshape(1,25)]).numpy().reshape(25)

### Time / ESS noise 0.001 and multiplicative coefficient 

In [14]:
noise = 0.001
scaling = 0.015 # 0.04
n_iter =  33000 #55000
burnin = 3000 #5000
thin = 20
sub_sampling = 1

Times = []
Time_ESS = []
ESS = []
i = 1

# Define the prior distribution and the proposal (common to all samples)
x_distribution = stats.multivariate_normal(mean = np.zeros(64), cov = np.eye(64))
my_proposal = tda.CrankNicolson(scaling=scaling, adaptive=False, gamma = 1.01, period=100)

for sample in random_samples:
    print('Sample = ', sample)
    x_true = X_values[sample]
    y_true = y_values[sample]

    y_observed = y_true + np.random.normal(scale=noise,size=y_true.shape[0])

    # LIKELYHOOD
    cov_likelihood = noise**2 * np.eye(25)
    y_distribution_coarse = tda.AdaptiveGaussianLogLike(y_observed, cov_likelihood*10)
    y_distribution_fine  = tda.GaussianLogLike(y_observed, cov_likelihood*10)

    # initialise the Posterior
    my_posterior_coarse = tda.Posterior(x_distribution, y_distribution_coarse, model_lf)
    my_posterior_fine = tda.Posterior(x_distribution, y_distribution_fine, model_hf)
    my_posteriors = [my_posterior_coarse, my_posterior_fine]

    # RUN THE MCMC
    start = timeit.default_timer()
    samples = tda.sample(my_posteriors, my_proposal, iterations=n_iter, n_chains=1, initial_parameters=np.zeros(64), subsampling_rate= sub_sampling, adaptive_error_model='state-independent')
    end = timeit.default_timer()

    # Remove the burnin and sub-sample
    idata = tda.to_inference_data(samples, level='fine')
    idata = idata.sel(draw=slice(burnin, None, thin), groups="posterior")
    ess = az.ess(idata)

    #Compute the time
    t = end-start
    Times.append(t)

    # Compute the mean ESS on the 64 parameters
    e = np.mean([ess.data_vars['x'+str(i)].values for i in range(64)])
    ESS.append(e)

    #Compute Time / ESS
    Time_ESS.append(t/e)
    
    print('Time:', t, '   ESS: ', e, '   Time/ESS: ',t/e )

# Save the results 
# Specify the folder path (assuming it already exists)
folder_path = './recorded_values'  # Replace with your actual path

# Save the file in the specified folder
file_path = os.path.join(folder_path, 'MDA_MF_1step_time_ess_001.npy')
np.save(file_path, Time_ESS)
file_path = os.path.join(folder_path, 'MDA_MF_1step_Times_001.npy')
np.save(file_path, Times)
file_path = os.path.join(folder_path, 'MDA_MF_1step_ESS_001.npy')
np.save(file_path, ESS)

Sample =  57
Sampling chain 1/1


  return np.exp(proposal_link.likelihood - previous_link.likelihood)
Running chain, α_c = 0.430, α_f = 0.34: 100%|██████████| 1000/1000 [00:11<00:00, 84.38it/s]


Time: 11.968942583014723    ESS:  4.122430318234322    Time/ESS:  2.9033705021219474
Sample =  32
Sampling chain 1/1


Running chain, α_c = 0.360, α_f = 0.32: 100%|██████████| 1000/1000 [00:11<00:00, 86.75it/s]


Time: 11.54722033298458    ESS:  3.802613454946228    Time/ESS:  3.0366537303350145
Sample =  55
Sampling chain 1/1


Running chain, α_c = 0.280, α_f = 0.16: 100%|██████████| 1000/1000 [00:11<00:00, 84.70it/s]


Time: 11.826589790987782    ESS:  3.285221425006511    Time/ESS:  3.5999368873482687
Sample =  69
Sampling chain 1/1


Running chain, α_c = 0.250, α_f = 0.27: 100%|██████████| 1000/1000 [00:11<00:00, 84.71it/s]


Time: 11.825682333990699    ESS:  4.046235431941922    Time/ESS:  2.922638223331252
Sample =  3
Sampling chain 1/1


Running chain, α_c = 0.300, α_f = 0.27: 100%|██████████| 1000/1000 [00:11<00:00, 85.86it/s]


Time: 11.66707204200793    ESS:  4.08602384232819    Time/ESS:  2.855360735085703


### Time/ESS Higher Noise

In [15]:
noise = 0.01
scaling = 0.04
n_iter =  33000 #55000
burnin = 3000 #5000
thin = 20
sub_sampling = 1

Times = []
Time_ESS = []
ESS = []
i = 1

# Define the prior distribution and the proposal (common to all samples)
x_distribution = stats.multivariate_normal(mean = np.zeros(64), cov = np.eye(64))
my_proposal = tda.CrankNicolson(scaling=scaling, adaptive=False, gamma = 1.01, period=100)

for sample in random_samples:
    print('Sample = ', sample)
    x_true = X_values[sample] 
    y_true = y_values[sample]

    y_observed = y_true + np.random.normal(scale=noise,size=y_true.shape[0])

    # LIKELYHOOD
    cov_likelihood = noise**2 * np.eye(25)
    y_distribution_coarse = tda.AdaptiveGaussianLogLike(y_observed, cov_likelihood)
    y_distribution_fine  = tda.GaussianLogLike(y_observed, cov_likelihood)
    # initialise the Posterior
    my_posterior_coarse = tda.Posterior(x_distribution, y_distribution_coarse, model_lf)
    my_posterior_fine = tda.Posterior(x_distribution, y_distribution_fine, model_hf)
    my_posteriors = [my_posterior_coarse, my_posterior_fine]

    # RUN THE MCMC
    start = timeit.default_timer()
    samples = tda.sample(my_posteriors, my_proposal, iterations=n_iter, n_chains=1, initial_parameters=np.zeros(64), subsampling_rate= sub_sampling, adaptive_error_model='state-independent')
    end = timeit.default_timer()

    # Remove the burnin and sub-sample
    idata = tda.to_inference_data(samples, level='fine')
    idata = idata.sel(draw=slice(burnin, None, thin), groups="posterior")
    ess = az.ess(idata)

    #Compute the time
    t = end-start
    Times.append(t)

    # Compute the mean ESS on the 64 parameters
    e = np.mean([ess.data_vars['x'+str(i)].values for i in range(64)])
    ESS.append(e)

    #Compute Time / ESS
    Time_ESS.append(t/e)
    
    print('Time:', t, '   ESS: ', e, '   Time/ESS: ',t/e , '     ', i,'/', len(random_samples))

    i = i+1

# Save the results 
# Specify the folder path (assuming it already exists)
folder_path = './recorded_values'  # Replace with your actual path

# Save the file in the specified folder
file_path = os.path.join(folder_path, 'MDA_MF_1step_time_ess_01.npy')
np.save(file_path, Time_ESS)
file_path = os.path.join(folder_path, 'MDA_MF_1step_Times_01.npy')
np.save(file_path, Times)
file_path = os.path.join(folder_path, 'MDA_MF_1step_ESS_01.npy')
np.save(file_path, ESS)

Sample =  57
Sampling chain 1/1


Running chain, α_c = 0.270, α_f = 0.26: 100%|██████████| 1000/1000 [00:11<00:00, 86.17it/s]


Time: 11.646104250015924    ESS:  3.2757094966013027    Time/ESS:  3.555292147273525       1 / 5
Sample =  32
Sampling chain 1/1


Running chain, α_c = 0.430, α_f = 0.34: 100%|██████████| 1000/1000 [00:11<00:00, 87.44it/s]


Time: 11.464480708003975    ESS:  3.7996420593918088    Time/ESS:  3.0172528171874804       2 / 5
Sample =  55
Sampling chain 1/1


Running chain, α_c = 0.370, α_f = 0.36: 100%|██████████| 1000/1000 [00:11<00:00, 89.50it/s]


Time: 11.194468625006266    ESS:  4.506571962232604    Time/ESS:  2.4840319246695013       3 / 5
Sample =  69
Sampling chain 1/1


Running chain, α_c = 0.280, α_f = 0.32: 100%|██████████| 1000/1000 [00:11<00:00, 90.14it/s]


Time: 11.115925582998898    ESS:  4.284278741522855    Time/ESS:  2.594585052383338       4 / 5
Sample =  3
Sampling chain 1/1


Running chain, α_c = 0.290, α_f = 0.30: 100%|██████████| 1000/1000 [00:11<00:00, 87.84it/s]


Time: 11.413445749989478    ESS:  4.533738802092797    Time/ESS:  2.517446692059316       5 / 5
