## Import Dependencies

In [1]:
%%time

import sys
import gpytorch
import numpy as np
import pandas as pd
import torch
from datetime import datetime
from scipy.stats import qmc
import itertools
from itertools import combinations_with_replacement, combinations, permutations

import bo_methods_lib
from bo_methods_lib.bo_functions_generic import gen_theta_set, clean_1D_arrays
from bo_methods_lib.GPBO_Classes_New import * #Fix this later
from bo_methods_lib.GPBO_Class_fxns import * #Fix this later

  from .autonotebook import tqdm as notebook_tqdm


CPU times: user 2.35 s, sys: 317 ms, total: 2.67 s
Wall time: 2.7 s


## Define Method and DateTime

In [2]:
#Set Date and Time
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S)")
# print("Date and Time: ", timestampStr)
# DateTime = dateTimeObj.strftime("%Y/%m/%d/%H-%M-%S%p")
DateTime = dateTimeObj.strftime("%Y/%m/%d/%H-%M")
DateTime = None ##For Testing

#Generate Method (except last part)
meth_name = Method_name_enum(2)
method = GPBO_Methods(meth_name)

print("Method Name: ", method.method_name.name)
print("Emulator?: ", method.emulator)
print("Obj Func: ", method.obj.name)
print("Sparse Grid?: ", method.sparse_grid)

Method Name:  B1
Emulator?:  False
Obj Func:  LN_OBJ
Sparse Grid?:  False


## Define Case Study Simulator & Functions

In [3]:
def simulator_helper_test_fxns(cs_name, indecies_to_consider):
    """
    Sets the model for calculating y based off of the case study identifier.

    Parameters
    ----------
    cs_name: Class, The name/enumerator associated with the case study being evaluated

    Returns
    -------
    calc_y_fxn: function, the function used for calculation is case study cs_name.name
    """
    #Note: Add your function name from GPBO_Class_fxns.py here
    if cs_name.value == 1:
        dim_x = 1       
        theta_ref = np.array([1, -1])
        theta_names = ['theta_1', 'theta_2']
        calc_y_fxn = calc_cs1_polynomial
        
    elif cs_name.value == 2:
        dim_x = 2       
        theta_ref = np.array([-200,-100,-170,15,-1,-1,-6.5,0.7,0,0,11,0.6,-10,-10,-6.5,0.7,1,0,-0.5,-1,0,0.5,1.5,1])
        theta_names = ['A_1', 'A_2', 'A_3', 'A_4', 'a_1', 'a_2', 'a_3', 'a_4', 'b_1', 'b_2', 'b_3', 'b_4', 'c_1', 
                       'c_2', 'c_3', 'c_4', 'x0_1', 'x0_2', 'x0_3', 'x0_4', 'x1_1', 'x1_2', 'x1_3', 'x1_4']
        calc_y_fxn = calc_muller
        
    else:
        raise ValueError("self.CaseStudyParameters.cs_name.value must exist!")

    return Simulator(dim_x, 
                     indecies_to_consider, 
                     theta_ref,
                     theta_names,
                     calc_y_fxn)

#Note for a general case also have the function (ex:)
# def calc_muller(model_coefficients, x):
#     """
#     Caclulates the Muller Potential
    
#     Parameters
#     ----------
#         model_coefficients: ndarray, The array containing the values of Muller constants
#         x: ndarray, Values of X
#         noise: ndarray, Any noise associated with the model calculation
    
#     Returns:
#     --------
#         y_mul: float, value of Muller potential
#     """
#     #Reshape x to matrix form
#     x = vector_to_1D_array(x) 
#     assert x.shape[0] == 2, "Muller Potential x_data must be 2 dimensional"
#     X1, X2 = x #Split x into 2 parts by splitting the rows
    
#     #Separate all model parameters into their appropriate pieces
#     model_coefficients_reshape = model_coefficients.reshape(6, 4)
        
#     #Calculate Muller Potential
#     A, a, b, c, x0, y0 = model_coefficients_reshape
#     term1 = a*(X1 - x0)**2
#     term2 = b*(X1 - x0)*(X2 - y0)
#     term3 = c*(X2 - y0)**2
#     y_mul = np.sum(A*np.exp(term1 + term2 + term3) )
    
#     return y_mul

## Define Case Study

In [4]:
cs_name  = CS_name_enum(2)
print("CS Name: ", cs_name.name)

indecies_to_consider = list(range(4, 12)) #This is what changes for different subproblems of CS2

ep0 = 1
sep_fact = 1.0
normalize = False
num_x_data = 27
num_theta_data = 20
lhs_gen_theta = True
eval_all_pairs = False
package = "scikit_learn"
noise_mean = 0
noise_std = 0.01
kernel = "Mat_52"
set_lenscl = 1
outputscl = False
retrain_GP = 2
GP_train_iter = 300
bo_iter_tot = 3
bo_run_tot = 2
save_fig = False
save_data = False
num_data = None
seed = 1

CS = CaseStudyParameters(cs_name, ep0, sep_fact, normalize, eval_all_pairs, package, noise_mean, noise_std, kernel, 
                         set_lenscl, outputscl, retrain_GP, GP_train_iter, bo_iter_tot, bo_run_tot, save_fig, 
                         save_data, DateTime, seed)

simulator = simulator_helper_test_fxns(cs_name, indecies_to_consider)

CS Name:  CS2


## Generate Experimental X and Y Data (If not given)

In [5]:
bounds_x_l = [-1.5, -0.5]
bounds_x_u = [1, 2]
bounds_p_l = [-2, -2, -10, -2, -2, -2,  5, -2]
bounds_p_u = [ 2,  2,   0,  2,  2,  2, 15,  2]
gen_meth_x = Gen_meth_enum(1)

# exp_data = Data(true_params, x_data, None, None, None, None, None, None)
driver = GPBO_Driver(CS, simulator, None, None)
exp_data = driver.gen_exp_data(bounds_p_l, bounds_x_l, bounds_p_u, bounds_x_u, num_x_data, gen_meth_x)

# print(exp_data.y_vals)

## Normalize Experimental X Data (optional)

In [6]:
scaled_exp_data = exp_data
bounds_p = scaled_exp_data.bounds_theta
bounds_x = scaled_exp_data.bounds_x

scaled_exp_data.theta_vals = scaled_exp_data.normalize(simulator.theta_true, bounds_p)
scaled_exp_data.x_vals = scaled_exp_data.normalize(scaled_exp_data.x_vals, bounds_x)

# print(scaled_exp_data.theta_true)
# print(scaled_exp_data.x_vals)

## Generate Simulation Theta Data

In [10]:
#Generate and store data as a method in the driver class which creates an instances of the data class and adds data to it)
# Define the lower and upper bounds (define seperately)
# bounds_p = np.array([[-2, -2, -10, -2, -2, -2,  5, -2],
#                     [ 2,  2,   0,  2,  2,  2, 15,  2]])

# sim_data = Data(None, true_params, x_data, None, None, None, None, None, None)
# sim_data.theta_vals = clean_1D_arrays(driver.create_param_data(num_theta_data, bounds_p, gen_meth))
share_x_from_exp = True
gen_meth_x = Gen_meth_enum(1)
gen_meth_theta = Gen_meth_enum(1)
sim_data = driver.gen_sim_data(num_theta_data, gen_meth_theta, num_x_data, gen_meth_x, share_x_from_exp)

#Set number and dimension of training data
num_gp_data = sim_data.get_num_gp_data(method)
dim_gp_data = sim_data.get_dim_gp_data(method)
num_theta_calc = sim_data.get_num_theta()

# print(sim_data.y_vals)
print(num_gp_data, dim_gp_data, num_theta_calc)

[ -84.74179267 -103.08066863 -102.44899132  -43.42396972   -5.78770289
 -114.39984015  -77.68534664  -51.11304856  -94.58183093  -36.61857691
 -110.82270192  -81.67258385 -152.38176931  -88.81419054 -109.4200563
  -13.41055178 -183.57957448 -115.32985953  -86.18090791 -160.16603728
 -108.69360077 -121.58699512 -142.6053061   -10.29621668  -71.1341678
 -103.0576797   -31.72490656]
20 8 20


## Generate y_sim, sse, or log(sse) data (Run Simulations)

In [8]:
#Create y_sim data based on method, data, and case study
y_sim = simulator.create_sim_data(method, CS, sim_data, exp_data)
sim_data.y_vals = y_sim
# print(y_sim)

## Augmenting Training Data (Skipped Steps)

In [14]:
best_theta = Data(simulator.theta_true.reshape(1,-1), sim_data.x_vals, bounds_p_l, bounds_x_l, bounds_p_u, bounds_x_u, None, None, None, None, None)
best_theta.y_vals = driver.create_y_exp_data(best_theta)

print(len(sim_data.theta_vals), len(sim_data.y_vals))

sim_data = driver.augment_train_data(sim_data, best_theta) #Best error in driver class

print(len(sim_data.theta_vals), len(sim_data.y_vals))

20 27
21 54


# Case Study 1 Test

In [None]:
cs_name  = CS_name_enum(1)

#Want to make this all one array. I think that makes more sense
indecies_to_consider = list(range(0, 3)) #This is what changes for different subproblems of CS2
ep0 = 1
sep_fact = 1.0
normalize = False
num_x_data = 5
dim_x = 1
num_theta_data = 20
lhs_gen_theta = True
eval_all_pairs = False
package = "scikit_learn"
noise_mean = 0
noise_std = 0.01
kernel = "Mat_52"
set_lenscl = 1
outputscl = False
retrain_GP = 2
GP_train_iter = 300
bo_iter_tot = 3
bo_run_tot = 2
save_fig = False
save_data = False
x_data_vals = np.linspace(-2,2,num_x_data)
num_data = None
calc_y_fxn = None
#For a general case, use calc_y_fxn = calc_cs1_polynomial
seed = 1

CS = CaseStudyParameters(cs_name, true_params, true_model_coefficients, param_dict, ep0, sep_fact, normalize, 
                         eval_all_pairs, package, noise_mean, noise_std, kernel, set_lenscl, outputscl, 
                         retrain_GP, GP_train_iter, bo_iter_tot, bo_run_tot, save_fig, save_data, DateTime, seed)

simulator = Simulator(dim_x, indecies_to_consider, lhs_gen_theta, calc_y_fxn)

#Generate Method (except last part)
method = GPBO_Methods(meth_name, None, None, None)
method.emulator = method.get_emulator()
method.obj = method.get_obj()
method.sparse_grid = method.get_sparse_grid()

print("CS Name: ", cs_name.name)
print("Method Name: ", method.method_name.name)
print("Emulator?: ", method.emulator)
print("Obj Func: ", method.obj.name)
print("Sparse Grid?: ", method.sparse_grid)

In [None]:
#Set true parameters
true_params, true_param_names = simulator.set_true_params(CS)
CS.true_params = true_params

#Set calc model
calc_model_fxn = simulator_helper_test_fxns(cs_name)
simulator.calc_y_fxn = calc_model_fxn

#For a general use:
# calc_model_fxn = calc_muller
# simulator.calc_y_fxn = calc_model_fxn

In [None]:
driver = GPBO_Driver(CS, simulator)
bounds_x = np.array([[-2], [2]])
gen_meth = Gen_meth_enum(2)
x_data = x_data_vals
exp_data = Data(None, true_params, x_data, None, None, None, None, None, None)
# print(x_data, x_data.shape)

In [None]:
#CS1
bounds_p = np.array([[-2, -2],
                    [ 2,  2]])
scaled_exp_data = Data(None, true_params, x_data, None, None, None, None, None, None)
scaled_exp_data.theta_true = scaled_exp_data.normalize(true_params, bounds_p)
scaled_exp_data.x_data = scaled_exp_data.normalize(x_data, bounds_x)

# print(scaled_exp_data.theta_true)
# print(scaled_exp_data.x_data)

In [None]:
#Best way to differentiate between case studies? Name? How to name appropriately?
#Note this is a new way of defining the function for CS2 which I think will be better suited to our needs.
y_exp = driver.create_y_exp_data(exp_data)
exp_data.y_vals = y_exp
# print(exp_data.y_vals)

In [None]:
# Define the lower and upper bounds
sim_data = Data(None, true_params, x_data, None, None, None, None, None, None)
sim_data.theta_vals = clean_1D_arrays(driver.create_param_data(num_theta_data, bounds_p, gen_meth))

#Set number and dimensions of training data
num_gp_data = sim_data.get_num_gp_data(method)
dim_gp_data = sim_data.get_dim_gp_data(method)
num_theta_calc = sim_data.get_num_theta()

print(num_gp_data, dim_gp_data, num_theta_calc)

In [None]:
#Create y_sim data based on method, data, and case study
y_sim = simulator.create_sim_data(method, CS, sim_data, exp_data)
sim_data.y_vals = y_sim
# print(y_sim)

In [None]:
best_theta = Data(true_params.reshape(1,-1), true_params, x_data, None, None, None, None, None, None)
best_theta.y_vals = simulator.create_sim_data(method, CS, best_theta, exp_data)

print(len(sim_data.theta_vals), len(sim_data.y_vals))

sim_data = driver.augment_train_data(sim_data, best_theta)

print(len(sim_data.theta_vals), len(sim_data.y_vals))