## Import Dependencies

In [1]:
%%time

import sys
import gpytorch
import numpy as np
import pandas as pd
import torch
from datetime import datetime
from scipy.stats import qmc
import itertools
from itertools import combinations_with_replacement, combinations, permutations

import bo_methods_lib
from bo_methods_lib.bo_functions_generic import gen_theta_set, clean_1D_arrays
from bo_methods_lib.GPBO_Classes_New import * #Fix this later
from bo_methods_lib.GPBO_Class_fxns import * #Fix this later

  from .autonotebook import tqdm as notebook_tqdm


CPU times: user 2.38 s, sys: 294 ms, total: 2.67 s
Wall time: 2.8 s


## Define Case Study & Most of Method

In [2]:
#Set Date and Time
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S)")
# print("Date and Time: ", timestampStr)
# DateTime = dateTimeObj.strftime("%Y/%m/%d/%H-%M-%S%p")
DateTime = dateTimeObj.strftime("%Y/%m/%d/%H-%M")
DateTime = None ##For Testing

cs_name  = CS_name_enum(2)

#Want to make this all one array. I think that makes more sense
true_model_coefficients = np.array([-200,-100,-170,15,-1,-1,-6.5,0.7,0,0,11,0.6,-10,-10,-6.5,0.7,1,0,-0.5,-1,0,0.5,1.5,1])
param_dict = {0 : 'A_1', 1 : 'A_2', 2 : 'A_3', 3 : 'A_4', 4 : 'a_1', 5 : 'a_2', 6 : 'a_3', 7 : 'a_4', 
             8 : 'b_1', 9 : 'b_2', 10 : 'b_3', 11 : 'b_4', 12 : 'c_1', 13 : 'c_2', 14 : 'c_3', 15 : 'c_4',
             16 : 'x0_1', 17 : 'x0_2', 18 : 'x0_3', 19 : 'x0_4', 20 : 'x1_1', 21 : 'x1_2', 22 : 'x1_3', 23 : 'x1_4'}

indecies_to_consider = list(param_dict.keys())[4:12] #This is what changes for different subproblems of CS2
true_params = None
# true_params = true_model_coefficients[indecies_to_consider]
# print([param_dict[key] if key in indecies_to_consider else f"No value found for key {key}" for key in indecies_to_consider])
ep0 = 1
sep_fact = 1.0
normalize = False
num_x_data = 27
dim_x = 2
num_theta_data = 20
lhs_gen_theta = True
eval_all_pairs = False
package = "scikit_learn"
noise_mean = 0
noise_std = 0.01
kernel = "Mat_52"
set_lenscl = 1
outputscl = False
retrain_GP = 2
GP_train_iter = 300
bo_iter_tot = 3
bo_run_tot = 2
save_fig = False
save_data = False
num_data = None
calc_y_fxn = None
seed = 1

CS = CaseStudyParameters(cs_name, true_params, true_model_coefficients, param_dict, ep0, sep_fact, normalize, 
                         eval_all_pairs, package, noise_mean, noise_std, kernel, set_lenscl, outputscl, 
                         retrain_GP, GP_train_iter, bo_iter_tot, bo_run_tot, save_fig, save_data, DateTime, seed)

simulator = Simulator(dim_x, indecies_to_consider, lhs_gen_theta, calc_y_fxn)

#Generate Method (except last part)
meth_name = Method_name_enum(5)
method = GPBO_Methods(meth_name, None, None, None)
method.emulator = method.get_emulator()
method.obj = method.get_obj()
method.sparse_grid = method.get_sparse_grid()

print("CS Name: ", cs_name.name)
print("Method Name: ", method.method_name.name)
print("Emulator?: ", method.emulator)
print("Obj Func: ", method.obj.name)
print("Sparse Grid?: ", method.sparse_grid)

CS Name:  CS2
Method Name:  C2
Emulator?:  True
Obj Func:  OBJ
Sparse Grid?:  True


## Set Theta True, number of data, and Calc Model (based on CS)

In [3]:
#Set true parameters
true_params, true_param_names = simulator.set_true_params(CS)
CS.true_params = true_params

#Set calc model
calc_model_fxn = simulator.set_calc_model(cs_name)
simulator.calc_y_fxn = calc_model_fxn

## Find Experimental X Data (If not given)

In [4]:
driver = GPBO_Driver(CS, simulator)
bounds_x = np.array([[-1.5, -0.5], [1, 2]])
gen_meth = Gen_meth_enum(1)
seed = 1
x_data = clean_1D_arrays(driver.create_param_data(num_x_data, bounds_x, gen_meth))
exp_data = Data(None, true_params, x_data, None, None, None, None, None, None)
# print(x_data, x_data.shape)

## Normalize Experimental X Data (optional)

In [5]:
bounds_p = np.array([[-2, -2, -10, -2, -2, -2,  5, -2],
                    [ 2,  2,   0,  2,  2,  2, 15,  2]])
scaled_exp_data = Data(None, true_params, x_data, None, None, None, None, None, None)
scaled_exp_data.theta_true = scaled_exp_data.normalize(true_params, bounds_p)
scaled_exp_data.x_data = scaled_exp_data.normalize(x_data, bounds_x)

# print(scaled_exp_data.theta_true)
# print(scaled_exp_data.x_data)

## Generate Experimental y Data

In [6]:
#Best way to differentiate between case studies? Name? How to name appropriately?
#Note this is a new way of defining the function for CS2 which I think will be better suited to our needs.
y_exp = driver.create_y_exp_data(exp_data)
exp_data.y_vals = y_exp
# print(exp_data.y_vals)

## Generate Simulation Theta Data

In [7]:
#Rather than using a skip param_type, I will be using indecies and dictionaries.
# Define the lower and upper bounds
bounds_p = np.array([[-2, -2, -10, -2, -2, -2,  5, -2],
                    [ 2,  2,   0,  2,  2,  2, 15,  2]])

sim_data = Data(None, true_params, x_data, None, None, None, None, None, None)
sim_data.theta_vals = clean_1D_arrays(driver.create_param_data(num_theta_data, bounds_p, gen_meth))

#Set number and dimension of training data
num_gp_data = sim_data.get_num_gp_data(method)
dim_gp_data = sim_data.get_dim_gp_data(method)
num_theta_calc = sim_data.get_num_theta()

print(num_gp_data, dim_gp_data, num_theta_calc)

540 10 20


## Generate y_sim, sse, or log(sse) data (Run Simulations)

In [8]:
#Create y_sim data based on method, data, and case study
y_sim = simulator.create_sim_data(method, CS, sim_data, exp_data)
sim_data.y_vals = y_sim
# print(y_sim)

## Augmenting Training Data (Skipped Steps)

In [9]:
best_theta = Data(true_params.reshape(1,-1), true_params, x_data, None, None, None, None, None, None)
best_theta.y_vals = simulator.create_sim_data(method, CS, best_theta, exp_data)

print(len(sim_data.theta_vals), len(sim_data.y_vals))

sim_data = driver.augment_train_data(sim_data, best_theta)

print(len(sim_data.theta_vals), len(sim_data.y_vals))

20 540
21 567


# Case Study 1 Test

In [10]:
#Set Date and Time
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S)")
# print("Date and Time: ", timestampStr)
# DateTime = dateTimeObj.strftime("%Y/%m/%d/%H-%M-%S%p")
DateTime = dateTimeObj.strftime("%Y/%m/%d/%H-%M")
DateTime = None ##For Testing

cs_name  = CS_name_enum(1)

#Want to make this all one array. I think that makes more sense
true_model_coefficients = np.array([1, -1])
true_params = None
param_dict = {0 : 'theta_1', 1 : 'theta_2'}
indecies_to_consider = list(param_dict.keys()) #This is what changes for different subproblems of CS2

# true_params = true_model_coefficients[indecies_to_consider]
# print([param_dict[key] if key in indecies_to_consider else f"No value found for key {key}" for key in indecies_to_consider])
ep0 = 1
sep_fact = 1.0
normalize = False
num_x_data = 5
dim_x = 2
num_theta_data = 20
lhs_gen_theta = True
eval_all_pairs = False
package = "scikit_learn"
noise_mean = 0
noise_std = 0.01
kernel = "Mat_52"
set_lenscl = 1
outputscl = False
retrain_GP = 2
GP_train_iter = 300
bo_iter_tot = 3
bo_run_tot = 2
save_fig = False
save_data = False
x_data_vals = np.linspace(-2,2,num_x_data)
num_data = None
calc_y_fxn = None
seed = 1

CS = CaseStudyParameters(cs_name, true_params, true_model_coefficients, param_dict, ep0, sep_fact, normalize, 
                         eval_all_pairs, package, noise_mean, noise_std, kernel, set_lenscl, outputscl, 
                         retrain_GP, GP_train_iter, bo_iter_tot, bo_run_tot, save_fig, save_data, DateTime, seed)

simulator = Simulator(dim_x, indecies_to_consider, lhs_gen_theta, calc_y_fxn)

#Generate Method (except last part)
method = GPBO_Methods(meth_name, None, None, None)
method.emulator = method.get_emulator()
method.obj = method.get_obj()
method.sparse_grid = method.get_sparse_grid()

print("CS Name: ", cs_name.name)
print("Method Name: ", method.method_name.name)
print("Emulator?: ", method.emulator)
print("Obj Func: ", method.obj.name)
print("Sparse Grid?: ", method.sparse_grid)

CS Name:  CS1
Method Name:  C2
Emulator?:  True
Obj Func:  OBJ
Sparse Grid?:  True


In [11]:
#Set true parameters
true_params, true_param_names = simulator.set_true_params(CS)
CS.true_params = true_params

#Set calc model
calc_model_fxn = simulator.set_calc_model(cs_name)
simulator.calc_y_fxn = calc_model_fxn

In [12]:
driver = GPBO_Driver(CS, simulator)
bounds_x = np.array([[-2], [2]])
gen_meth = Gen_meth_enum(2)
x_data = x_data_vals
exp_data = Data(None, true_params, x_data, None, None, None, None, None, None)
# print(x_data, x_data.shape)

In [13]:
#CS1
bounds_p = np.array([[-2, -2],
                    [ 2,  2]])
scaled_exp_data = Data(None, true_params, x_data, None, None, None, None, None, None)
scaled_exp_data.theta_true = scaled_exp_data.normalize(true_params, bounds_p)
scaled_exp_data.x_data = scaled_exp_data.normalize(x_data, bounds_x)

# print(scaled_exp_data.theta_true)
# print(scaled_exp_data.x_data)

In [14]:
#Best way to differentiate between case studies? Name? How to name appropriately?
#Note this is a new way of defining the function for CS2 which I think will be better suited to our needs.
y_exp = driver.create_y_exp_data(exp_data)
exp_data.y_vals = y_exp
# print(exp_data.y_vals)

In [15]:
# Define the lower and upper bounds
sim_data = Data(None, true_params, x_data, None, None, None, None, None, None)
sim_data.theta_vals = clean_1D_arrays(driver.create_param_data(num_theta_data, bounds_p, gen_meth))

#Set number and dimensions of training data
num_gp_data = sim_data.get_num_gp_data(method)
dim_gp_data = sim_data.get_dim_gp_data(method)
num_theta_calc = sim_data.get_num_theta()

print(num_gp_data, dim_gp_data, num_theta_calc)

2000 3 400


In [16]:
#Create y_sim data based on method, data, and case study
y_sim = simulator.create_sim_data(method, CS, sim_data, exp_data)
sim_data.y_vals = y_sim
# print(y_sim)

In [17]:
best_theta = Data(true_params.reshape(1,-1), true_params, x_data, None, None, None, None, None, None)
best_theta.y_vals = simulator.create_sim_data(method, CS, best_theta, exp_data)

print(len(sim_data.theta_vals), len(sim_data.y_vals))

sim_data = driver.augment_train_data(sim_data, best_theta)

print(len(sim_data.theta_vals), len(sim_data.y_vals))

400 2000
401 2005
