## Import Dependencies

In [1]:
%%time

import sys
import gpytorch
import numpy as np
import pandas as pd
import torch
from datetime import datetime
from scipy.stats import qmc
import itertools
from itertools import combinations_with_replacement, combinations, permutations

import bo_methods_lib
from bo_methods_lib.bo_functions_generic import gen_theta_set, clean_1D_arrays
from bo_methods_lib.GPBO_Classes_New import * #Fix this later
from bo_methods_lib.GPBO_Class_fxns import * #Fix this later
import pickle

  from .autonotebook import tqdm as notebook_tqdm


CPU times: user 2.39 s, sys: 279 ms, total: 2.67 s
Wall time: 2.67 s


## Define Method and DateTime

## Define Case Study Simulator & Functions

In [2]:
def simulator_helper_test_fxns(cs_name, indecies_to_consider, noise_mean, noise_std, case_study_parameters):
    """
    Sets the model for calculating y based off of the case study identifier.

    Parameters
    ----------
    cs_name: Class, The name/enumerator associated with the case study being evaluated

    Returns
    -------
    calc_y_fxn: function, the function used for calculation is case study cs_name.name
    """
    #Note: Add your function name from GPBO_Class_fxns.py here
    if cs_name.value == 1:      
        theta_names = ['theta_1', 'theta_2']
        bounds_x_l = [-2]
        bounds_x_u = [2]
        bounds_theta_l = [-2, -2]
        bounds_theta_u = [ 2,  2]
        theta_ref = np.array([1.0, -1.0])     
        calc_y_fxn = calc_cs1_polynomial
        
    elif cs_name.value == 2:                          
        theta_names = ['A_1', 'A_2', 'A_3', 'A_4', 'a_1', 'a_2', 'a_3', 'a_4', 'b_1', 'b_2', 'b_3', 'b_4', 'c_1', 
                       'c_2', 'c_3', 'c_4', 'x0_1', 'x0_2', 'x0_3', 'x0_4', 'x1_1', 'x1_2', 'x1_3', 'x1_4']
        bounds_x_l = [-1.5, -0.5]
        bounds_x_u = [1, 2]
        bounds_theta_l = [-300,-200,-250, 5,-2,-2,-10, -2, -2,-2,5,-2,-20,-20, -10,-1 ,-2,-2,-2, -2,-2,-2,0,-2]
        bounds_theta_u = [-100,  0, -150, 20,2, 2, 0,  2,  2,  2, 15,2, 0,0   , 0,  2, 2,  2, 2, 2 ,2 , 2, 2,2]
        theta_ref = np.array([-200,-100,-170,15,-1,-1,-6.5,0.7,0,0,11,0.6,-10,-10,-6.5,0.7,1,0,-0.5,-1,0,0.5,1.5,1])      
#         theta_ref = np.array([0.5, 0.5, 0.8, 2/3, 0.25, 0.25, 0.35, 0.675, 0.5, 0.5, 0.6, 0.65, 0.5, 0.5, 0.35, 28333/50000, 0.75, 0.5,
#     0.375, 0.25, 0.5, 0.625, 0.75, 0.75])
        calc_y_fxn = calc_muller
        
    else:
        raise ValueError("self.CaseStudyParameters.cs_name.value must exist!")

    return Simulator(indecies_to_consider, 
                     theta_ref,
                     theta_names,
                     bounds_theta_l, 
                     bounds_x_l, 
                     bounds_theta_u, 
                     bounds_x_u, 
                     noise_mean,
                     noise_std,
                     case_study_parameters,
                     calc_y_fxn)

## Define Case Study

## Generate Experimental X and Y Data (If not given)

## Generate Simulation Training/Testing Theta and y Data

## Generate Validation Data

## Create Normalized Data (optional)

In [3]:
#What is the best way to be building normalized data? Is it ok to write a method in a class that calls itself?
# scaled_exp_data = exp_data.norm_feature_data()
# scaled_sim_data = sim_data.norm_feature_data()
# scaled_val_data = val_data.norm_feature_data()
    
# #Q: How do I best automate which one of these to use? Where would I automate this?
# #A: GPBO_Driver
# print(scaled_exp_data.x_vals)

## Calculate Current Exploration Bias

## Initialize Driver

## Build GP Emulator Class

## Generate, Shuffle, and Split Training Data

## Set GP Model

## Train GP Model

## Evaluate GP Model Mean and Variance

## Evaluate GP SSE and SSE_var

## Calculate Best Error

## Calculate EI

## Optimize EI over theta

## Augmenting Training Data (Skipped Steps)

# Case Study GPBO Driver Test

In [4]:
cs_name  = CS_name_enum(1)
name_cs_str = cs_name.name
print("CS Name: ", name_cs_str)

# indecies_to_consider = list(range(0, 8)) #This is what changes for different subproblems of CS2
indecies_to_consider = list(range(0, 2)) #This is what changes for different subproblems of CS1

ep0 = 1
ep_enum = Ep_enum(1)
sep_fact = 0.05
normalize = False
gen_heat_map_data = True
noise_mean = 0
noise_std = 0.01
# noise_std = 0.0
kernel = Kernel_enum(1)
lenscl = None
outputscl = 1
retrain_GP = 0
GP_train_iter = 300
bo_iter_tot = 5
bo_run_tot = 2
save_data = False
num_data = None
seed = 1
ei_tol = 1e-6
obj_tol = 1e-4

num_x_data = 5
gen_meth_x = Gen_meth_enum(2)
num_theta_data = 20
gen_meth_theta = Gen_meth_enum(1)

#Set Date and Time
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S)")
DateTime = dateTimeObj.strftime("%Y/%m/%d/%H-%M")
DateTime = None ##For Testing

CS Name:  CS1


In [5]:
#Set Cs_params and Simulator
cs_params = CaseStudyParameters(cs_name, ep0, sep_fact, normalize, gen_heat_map_data, bo_iter_tot, bo_run_tot, 
                                save_data, DateTime, seed, ei_tol, obj_tol)
simulator = simulator_helper_test_fxns(cs_name, indecies_to_consider, noise_mean, noise_std, cs_params)

#Set Method
meth_name = Method_name_enum(5)
method = GPBO_Methods(meth_name)
print(method.method_name.name)

#Generate Exp Data
num_x_data = 5
gen_meth_x = Gen_meth_enum(2) #Note: Has to be the same for validation and sim data
exp_data = simulator.gen_exp_data(num_x_data, gen_meth_x)

#Generate Sim Data
num_theta_data = 20
gen_meth_theta = Gen_meth_enum(1)
sim_data = simulator.gen_sim_data(num_theta_data, num_x_data, gen_meth_theta, gen_meth_x)
sim_sse_data = simulator.sim_data_to_sse_sim_data(method, sim_data, exp_data)

#Generate Validation Data
num_theta_data_val = 10
gen_meth_theta_val = Gen_meth_enum(1)
val_data = simulator.gen_sim_data(num_theta_data_val, num_x_data, gen_meth_theta_val, gen_meth_x, True)
val_sse_data = simulator.sim_data_to_sse_sim_data(method, val_data, exp_data, True)

#Initialize Driver
ep_bias = Exploration_Bias(ep0, None, ep_enum, None, None, None, None, None, None, None)
driver = GPBO_Driver(cs_params, method, simulator, exp_data, sim_data, sim_sse_data, val_data, val_sse_data, None, ep_bias)

C2


In [6]:
#Run Bo iterations/restarts
reoptimize = 10
restart_bo_results = driver.run_bo_restarts(kernel, lenscl, outputscl, retrain_GP, reoptimize)



In [7]:
driver.save_data(restart_bo_results)

In [8]:
fileObj = open("No_Date/Data_Files/" + name_cs_str + ".pickle", 'rb')
loaded_results = pickle.load(fileObj)
fileObj.close()

print(loaded_results[0].configuration)
# print(loaded_results[0].list_gp_emulator_class[0])
# print(len(loaded_results[0].list_heat_map_data))
print(loaded_results[0].results_df)
print(loaded_results[0].results_df["Theta Min Obj Cum."].iloc[-1])
print(loaded_results[0].simulator_class.theta_true)
# print(loaded_results[0].heat_map_data_dict)
# print(loaded_results[0].simulator_class.theta_true_norm)

{'DateTime String': None, 'Method Name Enum Value': 5, 'Case Study Name': 'CS1', 'Exploration Bias Method Value': 1, 'Separation Factor': 0.05, 'Normalize': False, 'Heat Map Points Generated': True, 'Max BO Iters': 5, 'Number of Workflow Restarts': 2, 'Seed': 3, 'EI Tolerance': 0.0001, 'Obj Improvement Tolerance': 1e-06}
   index Best Error Exploration Bias     Max EI  \
0      0  95.996126                1        0.0   
1      0  95.996126                1  279.36106   
2      0  74.671073                1        0.0   
3      0  74.671073                1        0.0   

                                 Theta Max EI    Min Obj  \
0   [-1.424906265112171, 0.33997509467786546]  96.000542   
1  [-1.7253415927912925, -0.8920505627722609]  79.574663   
2    [0.2953551463002735, 1.0806035426343508]  74.668732   
3    [0.2953551463002735, 1.0806035426343508]  73.072422   

                                Theta Min Obj Min Obj Cum.  \
0  [-1.8970380902170731, -1.5961473012014646]    96.000542

## Plot BO Result Data

In [9]:
from bo_methods_lib.GPBO_Classes_plotters import *
runs = len(loaded_results)
dim_hps = len(loaded_results[0].list_gp_emulator_class[0].trained_hyperparams[0]) + 2
num_hp_sets = loaded_results[0].configuration["Max BO Iters"]
hps = np.zeros((runs, num_hp_sets, dim_hps))
hp_names = [f"\\ell_{i}" for i in range(1, dim_hps+1)]
hp_names[-2] = "\sigma"
hp_names[-1] = "\\tau"
data_true = None

for j in range(runs):
    run = loaded_results[j]
    for i in range(len(run.list_gp_emulator_class)):
       # Extract the array and convert other elements to float
        array_part = run.list_gp_emulator_class[i].trained_hyperparams[0]
        rest_part = np.array(run.list_gp_emulator_class[i].trained_hyperparams[1:], dtype=float)
        hp = np.concatenate([array_part, rest_part])
        # Create the resulting array of shape (1, 10)
        hps[j,i,:] = hp

In [10]:
xbins = 5
ybins = 7
title_fontsize = 24
other_fontsize = 20
save_path = None
x_label = "BO Iterations"
y_label = "Hyperparameters"
title = "Hyperparameter Values"
# plot_2D_Data(hps, hp_names, data_true, xbins, ybins, title, x_label, y_label, title_fontsize, other_fontsize, save_path)

In [11]:
runs = len(loaded_results)
dim_data = 3 #obj, min obj, and ei
num_sets = loaded_results[0].configuration["Max BO Iters"]
data = np.zeros((runs, num_sets, dim_data))
data_names = ["log(sse)", "min(log(sse))", "EI"]
data_true = None
for j in range(runs):
    run = loaded_results[j]
    # Extract the array and convert other elements to float
    for i in range(len(run.results_df["Min Obj"])):
        log_sse = np.log(run.results_df["Min Obj"].to_numpy().astype(float)[i])
        log_min_sse = np.log(run.results_df["Min Obj Cum."].to_numpy().astype(float)[i])
        max_ei = run.results_df["Max EI"].to_numpy().astype(float)[i]
        # Create the resulting array of shape (1, 10)
        data[j,i,0] = log_sse
        data[j,i,1] = log_min_sse
        data[j,i,2] = max_ei

In [12]:
xbins = 5
ybins = 7
title_fontsize = 24
other_fontsize = 20
save_path = None
x_label = "BO Iterations"
y_label = "Data"
title = "Data Values"
# plot_2D_Data(data, data_names, data_true, xbins, ybins, title, x_label, y_label, title_fontsize, other_fontsize, save_path)

In [13]:
runs = len(loaded_results)
dim_data = len(loaded_results[0].results_df["Theta Min Obj"].to_numpy()[0]) #len theta best

data = np.zeros((runs, num_sets, dim_data))
data_names = [f"\\theta_{i}" for i in range(1, dim_data+1)]
data_true = loaded_results[0].simulator_class.theta_true
for j in range(runs):
    run = loaded_results[j]
    num_sets = len(run.results_df["Theta Min Obj"])
    for i in range(num_sets):
        # Extract the array and convert other elements to float
        theta_min_obj = run.results_df["Theta Min Obj"].to_numpy()[i]
        # Create the resulting array of shape (1, 10)
        data[j,i,:] = theta_min_obj

In [14]:
xbins = 5
ybins = 5
title_fontsize = 24
other_fontsize = 20
save_path = None
x_label = "BO Iterations"
y_label = "Theta Values"
title = "Min Obj Parameter Values"
# plot_2D_Data(data, data_names, data_true, xbins, ybins, title, x_label, y_label, title_fontsize, other_fontsize, save_path)

In [15]:
##FIX ME
runs = len(loaded_results)
dim_data = len(loaded_results[0].results_df["Theta Min Obj Cum."].to_numpy()[0]) #len theta best
data = np.zeros((runs, num_sets, dim_data))
data_names = [f"\\theta_{i}" for i in range(1, dim_data+1)]
data_true = loaded_results[0].simulator_class.theta_true

print(data.shape)

for j in range(runs):
    run = loaded_results[j]
    num_sets = len(run.results_df["Theta Min Obj Cum."])
    for i in range(num_sets):
        # Extract the array and convert other elements to float
        theta_min_obj = run.results_df["Theta Min Obj Cum."].to_numpy()[i]
        print(theta_min_obj)
        # Create the resulting array of shape (1, 10)
        data[j,i,:] = theta_min_obj
print(data.shape)

(2, 2, 2)
[-1.89703809 -1.5961473 ]
[-1.72212911 -0.87887871]
[-1.72363501 -0.88504721]


IndexError: index 2 is out of bounds for axis 1 with size 2

In [None]:
xbins = 5
ybins = 5
title_fontsize = 24
other_fontsize = 20
save_path = None
x_label = "BO Iterations"
y_label = "Theta Values"
title = "Min Obj Parameter Values Overall"
# plot_2D_Data(data, data_names, data_true, xbins, ybins, title, x_label, y_label, title_fontsize, other_fontsize, save_path)

In [None]:
runs = len(loaded_results)
dim_data = len(loaded_results[0].results_df["Theta Max EI"].to_numpy()[0]) #len theta best
data = np.zeros((runs, num_sets, dim_data))
data_names = [f"\\theta_{i}" for i in range(1, dim_data+1)]
data_true = loaded_results[0].simulator_class.theta_true
for j in range(runs):
    run = loaded_results[j]
    num_sets = len(run.results_df["Theta Max EI"])
    for i in range(num_sets):
        # Extract the array and convert other elements to float
        theta_min_obj = run.results_df["Theta Max EI"].to_numpy()[i]
        # Create the resulting array of shape (1, 10)
        data[j,i,:] = theta_min_obj

In [None]:
xbins = 5
ybins = 5
title_fontsize = 24
other_fontsize = 20
save_path = None
x_label = "BO Iterations"
y_label = "Theta Values"
title = "Max EI Parameter Values"
# plot_2D_Data(data, data_names, data_true, xbins, ybins, title, x_label, y_label, title_fontsize, other_fontsize, save_path)

In [None]:
runs = len(loaded_results)
x_exp = loaded_results[0].simulator_class.gen_exp_data(num_x_data, gen_meth_x).x_vals
dim_data = loaded_results[0].list_gp_emulator_class[0].get_dim_gp_data() #dim training data
data_true = loaded_results[0].simulator_class.theta_true
param_names = [f"\\theta_{i}" for i in range(1, len(data_true)+1)]
x_names = [f"Xexp_{i}" for i in range(1, x_exp.shape[1]+1)]
data_names = param_names+x_names
train_data = loaded_results[0].list_gp_emulator_class[0].feature_train_data
test_data = loaded_results[0].list_gp_emulator_class[0].feature_test_data
val_data = loaded_results[0].list_gp_emulator_class[0].feature_val_data

In [None]:
xbins = 5
ybins = 5
zbins = 5

idcs_to_plot = [0,1,2]
title_fontsize = 24
other_fontsize = 20
save_path = None
title = "Data Run 1 BO Iter 1"

data_true = loaded_results[0].simulator_class.theta_true
# plot_train_test_val_data(train_data, test_data, val_data, data_names, idcs_to_plot, x_exp, xbins, ybins, zbins, 
#                          title, title_fontsize, other_fontsize, save_path)

## Plot Heat Maps

In [None]:
fileObj = open("No_Date/Data_Files/CS1.pickle", 'rb')
loaded_results = pickle.load(fileObj)
fileObj.close()

print(loaded_results[0].configuration)
print(loaded_results[0].results_df)

In [None]:
#Create Heat Map Data for a run 1 iter 1
run = 0
bo_iter = 0
pair = "theta_1-theta_2"

#Regeneate simulator, gp_emulator, exerimental data, best error, true theta, lowest obj theta, and highest ei theta
gp_emulator = loaded_results[run].list_gp_emulator_class[bo_iter]
heat_map_data_dict = loaded_results[run].heat_map_data_dict
heat_map_data = heat_map_data_dict[pair]
featurized_hm_data = gp_emulator.featurize_data(heat_map_data)
simulator = loaded_results[run].simulator_class
param_names = pair.split("-")
#Get index of param set
idcs_to_plot = [loaded_results[run].simulator_class.theta_true_names.index(name) for name in param_names]
exp_data = loaded_results[run].exp_data_class
best_error =  loaded_results[run].results_df["Best Error"].iloc[bo_iter]
theta_true = loaded_results[run].simulator_class.theta_true
theta_opt =  loaded_results[run].results_df["Theta Min Obj Cum."].iloc[bo_iter]
theta_next = loaded_results[run].results_df["Theta Max EI"].iloc[bo_iter]
train_theta = loaded_results[run].list_gp_emulator_class[bo_iter].train_data.theta_vals
enum_method = loaded_results[run].configuration["Method Name Enum Value"]
enum_ep = Ep_enum(loaded_results[run].configuration["Exploration Bias Method Value"])
ep_at_iter = loaded_results[run].results_df["Exploration Bias"].iloc[bo_iter]
meth_name = Method_name_enum(enum_method)
method = GPBO_Methods(meth_name)
ep_bias = Exploration_Bias(None, ep_at_iter, enum_ep, None, None, None, None, None, None, None)

print(len(heat_map_data.theta_vals))
#Calculate GP mean and var for heat map data
heat_map_data.gp_mean, heat_map_data.gp_var = gp_emulator.eval_gp_mean_var_heat_map(heat_map_data, featurized_hm_data)

#Calculate SSE, SSE var, and EI
if method.emulator == False:
    heat_map_data.sse_mean, heat_map_data.sse_var = gp_emulator.eval_gp_sse_var_heat_map(heat_map_data)
else:
    heat_map_data.sse_mean, heat_map_data.sse_var = gp_emulator.eval_gp_sse_var_heat_map(heat_map_data, exp_data)
    
if method.emulator == False:
    heat_map_data.ei = gp_emulator.eval_ei_heat_map(heat_map_data, exp_data, ep_bias, best_error)
else:
    heat_map_data.ei = gp_emulator.eval_ei_heat_map(heat_map_data, exp_data, ep_bias, best_error, method)

In [None]:
xbins = 5
ybins = 5
zbins = 900

vals_to_plot = [0, 1, 2]
levels = [100,100,100]
title_fontsize = 24
other_fontsize = 20
save_path = None
plot_axis_names = param_names
title = "Heat Map Pair"

# plot_heat_maps(heat_map_data, theta_true, theta_opt, theta_next, train_theta, plot_axis_names, levels, idcs_to_plot, vals_to_plot, xbins, 
#                ybins, zbins, title, title_fontsize, other_fontsize, save_path)