# Analyze the TIME / ESS distribution 

Randomly choose 25 samples and evaluate the model's Time / ESS 

### Import the libraries 

In [2]:
# Standard library imports
import sys

import os
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['KERAS_BACKEND'] = 'tensorflow'

# Third-party library imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
import arviz as az
import timeit

import scipy.stats as stats
from keras.models import Model as Model_nn
from keras.models import Sequential, load_model
from keras.layers import Dense, Concatenate
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau
from keras.layers import Input, Dense, Add

#Try with TinyDA
import tinyDA as tda
from scipy.stats import multivariate_normal
from scipy.stats import uniform
from itertools import product


# Local module imports
sys.path.append('../../')
sys.path.append('../../solver')
#sys.path.append('./src/InverseProblems')
#sys.path.append('./src/utils')
from utils import * 
from plotting import *
from random_process import *
from model import *

2024-09-22 16:02:23.655887: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Ray module not found. Multiprocessing features are not available


### Choose the 25 random samples

In [3]:
n = 25 
np.random.seed(2109)
random_samples = np.random.randint(0, 160, n)
random_samples

array([57, 32, 55, 69,  3])

### Load the data and surrogate model 

In [4]:
# Extract test data for visualization or further processing
n_eig = 64
X_values = np.loadtxt('../../data/50-25-10/X_test_50resolution.csv', delimiter = ',')
y_values = np.loadtxt('../../data/50-25-10/y_test_50resolution.csv',delimiter = ',')

In [5]:
# Resolution parameters
resolution_h1 = (50, 50)
resolution_h2 = (25, 25)

# PDE parameters
field_mean = 1
field_stdev = 1
lamb_cov = 0.1
mkl = 64

# Set up the model(s)
solver_h1 = Model(resolution_h1, field_mean, field_stdev, mkl, lamb_cov)
solver_h2 = Model(resolution_h2, field_mean, field_stdev, mkl, lamb_cov)


# Adjust the trasmissivity based on h1
list1 = solver_h1.solver.mesh.coordinates()
list2 = solver_h2.solver.mesh.coordinates()

# Convert lists to numpy arrays if they are not already
array1 = np.array(list1)
array2 = np.array(list2)

# Convert to structured arrays for easy row-wise comparison
dtype = {'names': ['f{}'.format(i) for i in range(array1.shape[1])],
         'formats': [array1.dtype] * array1.shape[1]}

structured_array1 = array1.view(dtype)
structured_array2 = array2.view(dtype)

# Create the boolean vector by checking if each row in array1 is in array2
bool_vector2 = np.in1d(structured_array1, structured_array2)

# Set the trasmissivity field
solver_h2.random_process.eigenvalues = solver_h1.random_process.eigenvalues
solver_h2.random_process.eigenvectors = solver_h1.random_process.eigenvectors[bool_vector2]

x_data = y_data = np.array([0.1, 0.3, 0.5, 0.7, 0.9])
datapoints = np.array(list(product(x_data, y_data)))

def solver_h2_data(x):
    solver_h2.solve(x)
    return solver_h2.get_data(datapoints)


# Choose the model parameters 
n_samples = 16000
coeff = 1e-08

n_neurons = 256
# Initialize the neural network model
# Define the three branches of the model
input_params = Input(shape=(X_values.shape[1],))
input_pod = Input(shape=(y_values.shape[1],))

# Define the first branch (parameters)
x1 = Dense(n_neurons, activation='gelu')(input_params)
x1 = Dense(n_neurons, activation='gelu')(x1)
x1 = Dense(n_neurons, activation='gelu')(x1)
x1 = Dense(n_neurons, activation='gelu')(x1)
x1 = Dense(n_neurons, activation='gelu')(x1)
x1 = Dense(n_neurons, activation='gelu')(x1)

# Define the second branch (POD)
x2 = Dense(n_neurons, activation='gelu')(input_pod)

# # Define the second branch (POD)
# x3 = Dense(n_neurons, activation='gelu', kernel_regularizer=l2(w))(input_nn)

# Combine the outputs of the three branches
combined = Add()([x1,x2])
combined = Dense(n_neurons, activation='gelu')(combined)
output = Dense(25, activation='linear')(combined)

# Create the model
model_hf = Model_nn(inputs=[input_params,input_pod], outputs=output)
model_hf = load_model(f'..//models/model_1step_50-25resolution_{n_samples}samples_1.keras')

model_nn = lambda input: model_hf([input.reshape(1,64), solver_h2_data(input).reshape(1,25)]).numpy().reshape(25)

### Time / ESS noise 0.001 and multiplicative coefficient 

In [8]:
noise = 0.001
scaling = 0.015 #0.05
n_iter = 55000
burnin = 5000
thin = 50

Times = []
Time_ESS = []
ESS = []
i = 1

# Define the prior distribution and the proposal (common to all samples)
x_distribution = stats.multivariate_normal(mean = np.zeros(64), cov = np.eye(64))
my_proposal = tda.CrankNicolson(scaling=scaling, adaptive=False, gamma = 1.01, period=100)

for sample in random_samples:
    print('Sample = ', sample)
    x_true = X_values[sample]
    y_true = y_values[sample]

    y_observed = y_true + np.random.normal(scale=noise,size=y_true.shape[0])

    # LIKELYHOOD
    cov_likelihood = noise**2 * np.eye(25)
    y_distribution = tda.GaussianLogLike(y_observed, cov_likelihood*10)  

    # initialise the Posterior
    my_posterior = tda.Posterior(x_distribution, y_distribution, model_nn)

    # RUN THE MCMC
    start = timeit.default_timer()
    samples = tda.sample(my_posterior, my_proposal, iterations=n_iter, n_chains=1, initial_parameters=np.zeros(64))
    end = timeit.default_timer()

    # Remove the burnin and sub-sample
    idata = tda.to_inference_data(samples, level='fine')
    idata = idata.sel(draw=slice(burnin, None, thin), groups="posterior")
    ess = az.ess(idata)

    #Compute the time
    t = end-start
    Times.append(t)

    # Compute the mean ESS on the 64 parameters
    e = np.mean([ess.data_vars['x'+str(i)].values for i in range(64)])
    ESS.append(e)

    #Compute Time / ESS
    Time_ESS.append(t/e)
    
    print('Time:', t, '   ESS: ', e, '   Time/ESS: ',t/e )


# Save the results 
# Specify the folder path (assuming it already exists)
folder_path = './recorded_values'  # Replace with your actual path

# Save the file in the specified folder
file_path = os.path.join(folder_path, 'MF_1step_time_ess_001.npy')
np.save(file_path, Time_ESS)
file_path = os.path.join(folder_path, 'MF_1step_Times_001.npy')
np.save(file_path, Times)
file_path = os.path.join(folder_path, 'MF_1step_ESS_001.npy')
np.save(file_path, ESS)

Sample =  57
Sampling chain 1/1


Running chain, α = 0.36: 100%|██████████| 1000/1000 [00:16<00:00, 61.71it/s]


Time: 16.236571500019636    ESS:  4.245821284127148    Time/ESS:  3.8241297533458325
Sample =  32
Sampling chain 1/1


Running chain, α = 0.43: 100%|██████████| 1000/1000 [00:18<00:00, 55.27it/s]


Time: 18.110157374991104    ESS:  3.9787764472188263    Time/ESS:  4.551690102531431
Sample =  55
Sampling chain 1/1


Running chain, α = 0.34: 100%|██████████| 1000/1000 [00:16<00:00, 62.13it/s]


Time: 16.11554962498485    ESS:  4.311329857842728    Time/ESS:  3.737953289672118
Sample =  69
Sampling chain 1/1


Running chain, α = 0.38: 100%|██████████| 1000/1000 [00:15<00:00, 64.49it/s]


Time: 15.52463887500926    ESS:  4.680923398510175    Time/ESS:  3.3165761439186072
Sample =  3
Sampling chain 1/1


Running chain, α = 0.35: 100%|██████████| 1000/1000 [00:15<00:00, 66.61it/s]


Time: 15.0324454579968    ESS:  3.975867565394955    Time/ESS:  3.780922078198927


### Time/ESS Higher Noise

In [None]:
noise = 0.01
scaling = 0.04
n_iter =  55000
burnin = 5000
thin = 50

Times = []
Time_ESS = []
ESS = []
i = 1

# Define the prior distribution and the proposal (common to all samples)
x_distribution = stats.multivariate_normal(mean = np.zeros(64), cov = np.eye(64))
my_proposal = tda.CrankNicolson(scaling=scaling, adaptive=False, gamma = 1.01, period=100)

for sample in random_samples:
    print('Sample = ', sample)
    x_true = X_values[sample]
    y_true = y_values[sample]

    y_observed = y_true + np.random.normal(scale=noise,size=y_true.shape[0])

    # LIKELYHOOD
    cov_likelihood = noise**2 * np.eye(25)
    y_distribution = tda.GaussianLogLike(y_observed, cov_likelihood)  

    # initialise the Posterior
    my_posterior = tda.Posterior(x_distribution, y_distribution, model_nn)

    # RUN THE MCMC
    start = timeit.default_timer()
    samples = tda.sample(my_posterior, my_proposal, iterations=n_iter, n_chains=1, initial_parameters=np.zeros(64))
    end = timeit.default_timer()

    # Remove the burnin and sub-sample
    idata = tda.to_inference_data(samples, level='fine')
    idata = idata.sel(draw=slice(burnin, None, thin), groups="posterior")
    ess = az.ess(idata)

    #Compute the time
    t = end-start
    Times.append(t)

    # Compute the mean ESS on the 64 parameters
    e = np.mean([ess.data_vars['x'+str(i)].values for i in range(64)])
    ESS.append(e)

    #Compute Time / ESS
    Time_ESS.append(t/e)
    
    print('Time:', t, '   ESS: ', e, '   Time/ESS: ',t/e , '     ', i,'/', len(random_samples))

    i = i+1

# Save the results 
# Specify the folder path (assuming it already exists)
folder_path = './recorded_values'  # Replace with your actual path

# Save the file in the specified folder
file_path = os.path.join(folder_path, 'MF_1step_time_ess_01.npy')
np.save(file_path, Time_ESS)
file_path = os.path.join(folder_path, 'MF_1step_Times_01.npy')
np.save(file_path, Times)
file_path = os.path.join(folder_path, 'MF_1step_ESS_01.npy')
np.save(file_path, ESS)

Sample =  57
Sampling chain 1/1


Running chain, α = 0.37: 100%|██████████| 1000/1000 [00:14<00:00, 67.85it/s]


Time: 14.776755625003716    ESS:  5.0205495454204145    Time/ESS:  2.943254616117194       1 / 5
Sample =  32
Sampling chain 1/1


Running chain, α = 0.31: 100%|██████████| 1000/1000 [00:15<00:00, 66.60it/s]


Time: 15.030243749992223    ESS:  4.638714055364616    Time/ESS:  3.2401746627623944       2 / 5
Sample =  55
Sampling chain 1/1


Running chain, α = 0.38: 100%|██████████| 1000/1000 [00:14<00:00, 69.79it/s]


Time: 14.346181083994452    ESS:  5.447396159949864    Time/ESS:  2.633585049214869       3 / 5
Sample =  69
Sampling chain 1/1


Running chain, α = 0.28: 100%|██████████| 1000/1000 [00:15<00:00, 62.94it/s]


Time: 15.903650249994826    ESS:  5.504153573247869    Time/ESS:  2.8893907189094765       4 / 5
Sample =  3
Sampling chain 1/1


Running chain, α = 0.42: 100%|██████████| 1000/1000 [00:17<00:00, 57.57it/s]


Time: 17.388912833004724    ESS:  5.447537120459787    Time/ESS:  3.1920687181177123       5 / 5
