In [1]:
%%time

import sys
import gpytorch
import numpy as np
import pandas as pd
import torch
from datetime import datetime
from scipy.stats import qmc
from scipy.stats import norm, multivariate_normal
import itertools
from itertools import combinations_with_replacement, combinations, permutations
import copy
import Tasmanian

import bo_methods_lib
from bo_methods_lib.bo_methods_lib.GPBO_Classes_New import * #Fix this later
from bo_methods_lib.bo_methods_lib.GPBO_Class_fxns import * #Fix this later
from bo_methods_lib.bo_methods_lib.GPBO_Classes_plotters import * #Fix this later
import pickle
import gzip

warnings.simplefilter("ignore", category=RuntimeWarning)
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

CPU times: total: 4.17 s
Wall time: 4.81 s


In [2]:
cs_name_val = 2
meth_name_enum = 5
CS_name  = CS_name_enum(cs_name_val)
param_name_str = "y0" #set_param_str(cs_name_val)
indecies_to_consider = set_idcs_to_consider(cs_name_val, param_name_str)
meth_name = Method_name_enum(meth_name_enum)
method = GPBO_Methods(meth_name)

ep0 = 1
ep_enum = Ep_enum(1)
sep_fact = 1.0
normalize = False
noise_mean = 0
noise_std = 0.01
# noise_std = 0.0
kernel = Kernel_enum(1)
lenscl = None
outputscl = 1 #outpulscl tuning is critical for log scaled obj fxns not to terminate early w/ regret (stdv affected)
retrain_GP = 1
reoptimize_obj = 1
bo_iter_tot = 1
bo_run_tot = 1
save_data = False
seed = 5
ei_tol = 1e-6
obj_tol = 1e-6
DateTime = None

num_x_data = 5
gen_meth_x = Gen_meth_enum(2) #Note: Has to be the same for validation and sim data
num_theta_data = 10*len(indecies_to_consider)
num_theta_data_val = 200
gen_meth_theta = Gen_meth_enum(1)
gen_meth_theta_val = Gen_meth_enum(1)
gen_heat_map_data = False

In [3]:
#Get training data
simulator = simulator_helper_test_fxns(CS_name, indecies_to_consider, noise_mean, noise_std, normalize, seed)

#Calculate minimum Muller potential
min_Mul = solve_pyomo_Muller_min(param_name_str, verbose = False)

#Generate Exp Data
exp_data = simulator.gen_exp_data(num_x_data, gen_meth_x)

#Generate Sim Data
sim_data = simulator.gen_sim_data(num_theta_data, num_x_data, gen_meth_theta, gen_meth_x, sep_fact, False)

#Generate sse_sim_data from new sim and exp_data
sim_sse_data = simulator.sim_data_to_sse_sim_data(method, sim_data, exp_data, sep_fact, False)

#Generate validation data
val_data = simulator.gen_sim_data(num_theta_data_val, num_x_data, gen_meth_theta_val, gen_meth_x, sep_fact, True)
val_sse_data = simulator.sim_data_to_sse_sim_data(method, val_data, exp_data, sep_fact, True)

#Set Cs_params and Simulator
cs_name = CS_name.name + "_BO_method_" + meth_name.name + "_sep_fact_" + str(round(sep_fact,2))
cs_params = CaseStudyParameters(cs_name, ep0, sep_fact, normalize, kernel, lenscl, outputscl, retrain_GP, 
                                reoptimize_obj, gen_heat_map_data, bo_iter_tot, bo_run_tot, save_data, DateTime, 
                                seed, ei_tol, obj_tol)

#Initialize Driver
ep_bias = Exploration_Bias(ep0, None, ep_enum, None, None, None, None, None, None, None)
driver = GPBO_Driver(cs_params, method, simulator, exp_data, sim_data, sim_sse_data, val_data, val_sse_data, None, 
                     ep_bias, gen_meth_theta)

In [4]:
#Make emulator
if driver.method.emulator == False:
    all_gp_data = driver.sim_sse_data
    all_val_data = driver.val_sse_data
    gp_emulator = Type_1_GP_Emulator(all_gp_data, all_val_data, None, None, None, driver.cs_params.kernel, 
                                     driver.cs_params.lenscl, driver.simulator.noise_std, driver.cs_params.outputscl, 
                                     driver.cs_params.retrain_GP, driver.cs_params.seed, None, None, None, None)
else:
    all_gp_data = driver.sim_data
    all_val_data = driver.val_data
    gp_emulator = Type_2_GP_Emulator(all_gp_data, all_val_data, None, None, None, driver.cs_params.kernel, 
                                     driver.cs_params.lenscl, driver.simulator.noise_std, driver.cs_params.outputscl, 
                                     driver.cs_params.retrain_GP, driver.cs_params.seed, None, None, None, None)
    
driver.gp_emulator = gp_emulator
#Set train_test data
train_data, test_data = driver.gp_emulator.set_train_test_data(driver.cs_params.sep_fact, driver.cs_params.seed)
        
#Initilize gp model
gp_model = driver.gp_emulator.set_gp_model()
driver.gp_emulator.train_gp(gp_model)

In [8]:
def ei_func(random_var, best_error, y_target, gp_mean, gp_var):
    #Create a mask for values where pred_stdev >= 0 (Here approximation includes domain stdev >= 0) 
    pos_stdev_mask = (gp_var >= 0)

    #Assuming all standard deviations are not zero
    if np.any(pos_stdev_mask):
        #Get indices and values where stdev > 0
        valid_indices = np.where(pos_stdev_mask)[0]
        gp_stdev_val = np.sqrt(gp_var[valid_indices])
        gp_mean_val = gp_mean[valid_indices]
        y_target_val = y_target[valid_indices]
        mean_min_y = y_target_val - gp_mean_val
    
        # Calculate gp_var multiplied by points_p
        gp_stdev_rand_var = gp_stdev_val * random_var
        gp_stdev_rand_var = gp_stdev_val * random_var

        # Calculate the SSE for all data points simultaneously
        sse_temp = np.sum((mean_min_y[:, np.newaxis].T - gp_stdev_rand_var)**2, axis=1)

        # Apply max operator (equivalent to max[(best_error*ep) - SSE_Temp,0])
        improvement = np.maximum(best_error - sse_temp, 0).reshape(-1,1)

        # Calculate EI_temp using vectorized operations
        ei_temp = improvement.flatten()
        # mvn = np.array([multivariate_normal.pdf(random_var[i], mean = np.zeros(len(random_var[i])), cov = np.eye(len(random_var[i]))) 
                        # for i in range(len(random_var))])
        
        # Calculate the multivariate normal pdf for each row in 'epsilon'
        mean_vector = np.zeros(random_var.shape[1])  # Assuming mean is zero for each dimension
        cov_matrix = np.eye(random_var.shape[1])     # Assuming identity covariance matrix

        mvn = multivariate_normal.pdf(random_var, mean=mean_vector, cov=cov_matrix)

        ei_temp = ei_temp*mvn
        print(ei_temp.shape)

    else:
        ei_temp = 0
        
    return ei_temp

In [9]:
def bootstrap(pilot_sample, statistic_function=None, ns=1000000, alpha=0.05, consolidator=lambda dummy: np.mean(dummy,axis=0), seed = seed):
    # pilot_sample has one column per rv, one row per observation
    # alpha is the level of significance; 0.05 for 95% confidence interval
    pilot_sample = np.array(pilot_sample)
    n_obs = pilot_sample.shape[0]
    theta_shape = list(pilot_sample.shape)
    quantiles = np.array([alpha*0.5, 1.0-alpha*0.5])
    from numpy.random import default_rng
    rng = default_rng(int(seed))
    if consolidator is None:
        f1 = statistic_function
        f2 = None
        theta_orig = f1(pilot_sample)
        f1_shape = theta_orig.shape
    elif statistic_function is None:
        f1 = consolidator
        f2 = None
        theta_orig = f1(pilot_sample)
        f1_shape = theta_orig.shape
    else:
        f1 = consolidator
        f2 = statistic_function
        consolidated_orig = f1(pilot_sample)
        f1_shape = consolidated_orig.shape
        theta_orig = f2(consolidated_orig)

    theta_bs = np.zeros(tuple([ns]+list(f1_shape)))

    for ibs in range(ns):
        theta_bs[ibs,...] = f1(pilot_sample[rng.integers(0,n_obs,n_obs)])
    if f2 is not None:
        theta_bs = f2(theta_bs)
    # percentile CI
    CI_percentile = np.quantile(theta_bs, quantiles, 0)

    return theta_orig, theta_bs, CI_percentile


In [10]:
#Test MC integration
def mc_integrate(func, driver, a, b, dim, n = 1000):
    # Monte Carlo integration of given function over domain from a to b (for each parameter)
    # dim: dimensions of function
    #Initialize total ei
    np.random.seed(seed)
    integ_theta = np.zeros(len(driver.gp_emulator.gp_val_data.get_unique_theta()))
    vars = np.zeros(len(driver.gp_emulator.gp_val_data.get_unique_theta()))
    bs_vars = []
    for i in range(len(driver.gp_emulator.gp_val_data.get_unique_theta())):
        #Calcuate best error
        if driver.method.emulator == False:
            #Type 1 best error is inferred from training data 
            best_error, be_theta = driver.gp_emulator.calc_best_error()
            best_errors_x = None
        else:
            #Type 2 best error must be calculated given the experimental data
            best_error, be_theta, best_errors_x = driver.gp_emulator.calc_best_error(driver.method, driver.exp_data)
        #Evaluate GP for validation data
        y_sim = driver.exp_data.y_vals
        gp_mean_all, gp_var_all = driver.gp_emulator.eval_gp_mean_var_val()
        gp_mean = gp_mean_all[i*len(y_sim):i*len(y_sim)+len(y_sim)]
        gp_var = gp_var_all[i*len(y_sim):i*len(y_sim)+len(y_sim)]
        #Get random variable
        random_var = np.random.multivariate_normal(np.zeros(dim), np.eye(dim), n)
        #Calc EI
        ei = func(random_var, best_error, y_sim, gp_mean, gp_var)
        ei_mean = np.average(ei) #y.sum()/len(y)
        vars[i] = 2*np.std(ei)
        domain = np.power(b-a, dim)
        
        #Calc monte carlo integrand for each theta and add it to the total
        integ = domain * ei_mean
        integ_theta[i] = integ

        # Perform bootstrapping
        bootstrap_vars = bootstrap(ei, statistic_function=None, ns=100, alpha=0.05, seed=seed)
        bs_vars.append(bootstrap_vars[-1])

    return integ_theta, vars, np.array(bs_vars)

a = 0
b = 1
n= 1000
# a = -3.668470846559581
# b = 3.668470846559581
# n = 115813
#Fill in f_args with data from a run
ei_mc, vars, ci = mc_integrate(ei_func, driver, a, b, len(driver.exp_data.y_vals), n = n)
print(ei_mc[ei_mc > 0])
# print(vars)
print(ci[(ci > 0).all(axis=1)])

(1000,)
(1000,)


KeyboardInterrupt: 

In [None]:
#Test Sparse Grid Integration
#Calcuate best error
if driver.method.emulator == False:
    #Type 1 best error is inferred from training data 
    best_error_metrics = driver.gp_emulator.calc_best_error()
    best_errors_x = None
else:
    #Type 2 best error must be calculated given the experimental data
    best_error_metrics = driver.gp_emulator.calc_best_error(driver.method, driver.exp_data)
#Set be in ep bias class
driver.ep_bias.best_error = best_error_metrics[0]
driver.ep_bias.set_ep()
#Calculate EI for validation data
if driver.method.emulator == False:
    ei_output = driver.gp_emulator.eval_ei_val(driver.exp_data, driver.ep_bias, best_error_metrics)
else:
    ei_output = driver.gp_emulator.eval_ei_val(driver.exp_data, driver.ep_bias, best_error_metrics, driver.method)

ei_sparse = ei_output[0]
print(ei_sparse)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]


In [None]:
print(ei_sparse[-1]/ei_mc[-1])
print(ei_sparse[-4]/ei_mc[-4])

nan
nan
