## Import Dependencies

In [1]:
%%time

import sys
import gpytorch
import numpy as np
import pandas as pd
import torch
from datetime import datetime
from scipy.stats import qmc
import itertools
from itertools import combinations_with_replacement, combinations, permutations

import bo_methods_lib
from bo_methods_lib.bo_functions_generic import gen_theta_set, clean_1D_arrays
from bo_methods_lib.GPBO_Classes_New import * #Fix this later
from bo_methods_lib.GPBO_Class_fxns import * #Fix this later

  from .autonotebook import tqdm as notebook_tqdm


CPU times: user 2.35 s, sys: 302 ms, total: 2.65 s
Wall time: 2.68 s


## Define Method and DateTime

In [2]:
#Generate Method (except last part)
meth_name = Method_name_enum(3)
method = GPBO_Methods(meth_name)

print("Method Name: ", method.method_name.name)
print("Emulator?: ", method.emulator)
print("Obj Func: ", method.obj.name)
print("Sparse Grid?: ", method.sparse_grid)

#Set Date and Time
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S)")
# print("Date and Time: ", timestampStr)
# DateTime = dateTimeObj.strftime("%Y/%m/%d/%H-%M-%S%p")
DateTime = dateTimeObj.strftime("%Y/%m/%d/%H-%M")
DateTime = None ##For Testing

print(method.method_name.value)

Method Name:  A2
Emulator?:  True
Obj Func:  OBJ
Sparse Grid?:  False
3


## Define Case Study Simulator & Functions

In [3]:
def simulator_helper_test_fxns(cs_name, indecies_to_consider, noise_mean, noise_std, case_study_parameters):
    """
    Sets the model for calculating y based off of the case study identifier.

    Parameters
    ----------
    cs_name: Class, The name/enumerator associated with the case study being evaluated

    Returns
    -------
    calc_y_fxn: function, the function used for calculation is case study cs_name.name
    """
    #Note: Add your function name from GPBO_Class_fxns.py here
    if cs_name.value == 1:      
        theta_ref = np.array([1.0, -1.0])
        theta_names = ['theta_1', 'theta_2']
        bounds_x_l = [-2]
        bounds_x_u = [2]
        bounds_theta_l = [-2, -2]
        bounds_theta_u = [ 2,  2]
        calc_y_fxn = calc_cs1_polynomial
        
    elif cs_name.value == 2:     
        theta_ref = np.array([-200,-100,-170,15,-1,-1,-6.5,0.7,0,0,11,0.6,-10,-10,-6.5,0.7,1,0,-0.5,-1,0,0.5,1.5,1])
                             
        theta_names = ['A_1', 'A_2', 'A_3', 'A_4', 'a_1', 'a_2', 'a_3', 'a_4', 'b_1', 'b_2', 'b_3', 'b_4', 'c_1', 
                       'c_2', 'c_3', 'c_4', 'x0_1', 'x0_2', 'x0_3', 'x0_4', 'x1_1', 'x1_2', 'x1_3', 'x1_4']
        bounds_x_l = [-1.5, -0.5]
        bounds_x_u = [1, 2]
        bounds_theta_l = [-300,-200,-250, 5,-2,-2,-10, -2, -2,-2,5,-2,-20,-20, -10,-1 ,-2,-2,-2, -2,-2,-2,0,-2]
        bounds_theta_u = [-100,  0, -150, 20,2, 2, 0,  2,  2,  2, 15,2, 0,0   , 0,  2, 2,  2, 2, 2 ,2 , 2, 2,2]
        calc_y_fxn = calc_muller
        
    else:
        raise ValueError("self.CaseStudyParameters.cs_name.value must exist!")

    return Simulator(indecies_to_consider, 
                     theta_ref,
                     theta_names,
                     bounds_theta_l, 
                     bounds_x_l, 
                     bounds_theta_u, 
                     bounds_x_u, 
                     noise_mean,
                     noise_std,
                     case_study_parameters,
                     calc_y_fxn)

## Define Case Study

In [4]:
cs_name  = CS_name_enum(1)
print("CS Name: ", cs_name.name)

# indecies_to_consider = list(range(4, 12)) #This is what changes for different subproblems of CS2
indecies_to_consider = list(range(0, 2)) #This is what changes for different subproblems of CS1

ep0 = 1
ep_enum = Ep_enum(1)
sep_fact = 0.8
normalize = False
lhs_gen_theta = True
eval_all_pairs = False
noise_mean = 0
noise_std = 0.01
kernel = Kernel_enum(1)
lenscl = None
outputscl = 1
retrain_GP = 0
GP_train_iter = 300
bo_iter_tot = 3
bo_run_tot = 2
save_fig = False
save_data = False
num_data = None
seed = 1

cs_params = CaseStudyParameters(cs_name, ep0, sep_fact, normalize, eval_all_pairs, bo_iter_tot, bo_run_tot, 
                         save_fig, save_data, DateTime, seed)

simulator = simulator_helper_test_fxns(cs_name, indecies_to_consider, noise_mean, noise_std, cs_params)

CS Name:  CS1


## Generate Experimental X and Y Data (If not given)

In [5]:
#Test gen_exp_data
num_x_data = 5
gen_meth_x = Gen_meth_enum(2) #Note: Has to be the same for validation and sim data
exp_data = simulator.gen_exp_data(num_x_data, gen_meth_x)

## Generate Simulation Training/Testing Theta and y Data

In [6]:
num_theta_data = 100
gen_meth_theta = Gen_meth_enum(1)
sim_data = simulator.gen_sim_data(num_theta_data, num_x_data, gen_meth_theta, gen_meth_x)
print(len(sim_data.theta_vals))
sim_sse_data = simulator.sim_data_to_sse_sim_data(method, sim_data, exp_data)
print(len(sim_sse_data.theta_vals))

500
100


## Generate Validation Data

In [7]:
num_theta_data_val = 11 #How to make the seed different for generating validation data?
gen_meth_theta_val = Gen_meth_enum(2)
val_data = simulator.gen_sim_data(num_theta_data_val, num_x_data, gen_meth_theta_val, gen_meth_x, True)
val_sse_data = simulator.sim_data_to_sse_sim_data(method, val_data, exp_data, True)

print(val_sse_data.x_vals[0:10])

[[-2.]
 [-1.]
 [ 0.]
 [ 1.]
 [ 2.]]


## Create Normalized Data (optional)

In [8]:
#What is the best way to be building normalized data? Is it ok to write a method in a class that calls itself?
scaled_exp_data = exp_data.norm_feature_data()
scaled_sim_data = sim_data.norm_feature_data()
scaled_val_data = val_data.norm_feature_data()

#Q: How do I best automate which one of these to use? Where would I automate this?
#A: GPBO_Driver

## Initialize Driver

In [9]:
driver = GPBO_Driver(cs_params, method, simulator, exp_data, sim_data, sim_sse_data, val_data, val_sse_data)

## Build GP Emulator Class

In [10]:
gp_emulator = driver.gen_emulator(kernel, lenscl, outputscl, retrain_GP)
print(gp_emulator.get_num_gp_data())

500


## Generate, Shuffle, and Split Training Data

In [11]:
#Q: Need this to get training indecies based on emulator status
#A: Call train test split in driver
train_data, test_data = gp_emulator.set_train_test_data(cs_params)

# print(train_data.theta_vals)

## Set GP Model

In [12]:
gp_model = gp_emulator.set_gp_model()
print(gp_emulator.kernel)

Kernel_enum.MAT_52


## Train GP Model

In [13]:
gp_emulator.train_gp(gp_model)

In [14]:
print(gp_emulator.trained_hyperparams)

[array([5.15230667, 2.93967812, 0.68698035]), 0.0001, 1]


## Evaluate GP Model Mean and Variance

In [15]:
gp_mean, gp_var = gp_emulator.eval_gp_mean_var()
# print(gp_emulator.gp_val_data.theta_vals[91])
# print(gp_emulator.gp_val_data.gp_mean[91])
# print(gp_emulator.gp_val_data.gp_var[91])

## Evaluate GP SSE and SSE_var

In [16]:
# sse, sse_var = gp_emulator.eval_gp_sse_var() # For Type 1
sse, sse_var = gp_emulator.eval_gp_sse_var(exp_data) #Make private method?

In [17]:
# print(val_data_true.sse)
print(gp_emulator.gp_val_data.sse[91])
# print(gp_emulator.gp_val_data.sse_var[91])

1.7764695523823395


## Calculate Best Error

In [18]:
# best_error = gp_emulator.calc_best_error() #For Type 1
best_error = gp_emulator.calc_best_error(exp_data) #Make private method?

In [19]:
print(best_error)

3.1560643225167166


## Calculate Current Exploration Bias

In [20]:
ep_bias = Exploration_Bias(ep0, None, ep_enum, None, None, None, None, None)
ep_bias.set_ep()
print(ep_bias.ep_curr)

1


## Calculate EI

In [21]:
# val_ei = gp_emulator.eval_gp_ei(cs_params, ep_bias, best_error) #Type 1
val_ei = gp_emulator.eval_gp_ei(exp_data, ep_bias, best_error, method)

In [22]:
print(np.nanmax(gp_emulator.gp_val_data.ei))
print(gp_emulator.gp_val_data.ei)

14.044783401527486
[ 3.15239808  3.15517871  3.15567124  3.15547244  3.15493923  3.15501834
  6.75501363  6.58931419  5.67103832  6.15403483  6.30664435  5.2557153
  3.1558716   3.15587671  3.15574428  3.15567491  5.45854333  7.84952597
  5.65925202  6.14600502  6.31166262  6.14929762  7.83925881  3.76291372
  3.15587482  3.15582584  3.73552994  7.8571622   7.37759087  6.14678623
  6.31165237  6.15690413  5.67838799  8.70812687  6.61396055  3.77599874
  3.74328197  6.58704722  8.65991116  6.14806245  6.311728    6.15437289
  5.6784328   4.88416235  7.95673783  8.6740195   5.4801502   5.49038314
  8.66577037  7.85864307  6.3117351   6.15471768  5.67709107  4.88021464
  3.77000028  6.31087408  9.74500961  9.06350964  9.12307976  9.76298039
  6.3117228   6.15501318  5.67831943  4.88217441  3.76422071  3.1555621
  6.74833079  9.76388148 11.64158933 11.66988667  9.75831962  6.77274699
  5.67890917  4.88306149  3.76803736  3.15587807  3.15558172  7.37038635
  8.65432868 14.03878595 13.992237

## Optimize EI over theta

## Augmenting Training Data (Skipped Steps)

# Case Study 1 Test

In [23]:
#Write Later