In [39]:
import gpytorch
import numpy as np
import pandas as pd
import torch
from datetime import datetime
from matplotlib import pyplot as plt
import sys

import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300

from bo_functions_generic import gen_theta_set

from CS2_bo_plotters import plot_obj_abs_min
from CS2_bo_plotters import value_plotter
from CS2_bo_plotters import plot_obj
from CS2_bo_plotters import plot_Theta
from CS2_bo_plotters import plot_Theta_min
from CS2_bo_plotters import path_name
from CS2_bo_functions_multi_dim import set_ep
from CS2_bo_plotters import plot_org_train, plot_EI_abs_max

In [40]:
#https://numpy.org/doc/stable/reference/generated/numpy.loadtxt.html

def csv_to_array(csv_path):
    """
    Turn a csv file into a numpy array so that it can be used with bo_plotters
    
    Parameters:
    -----------
        csv_path: str, path of csv you want to turn into an array
    Returns:
    --------
        csv_array: np.ndarray, array on values in the CSV file
    """
    
    csv_pd = pd.read_csv(csv_path, index_col = False)
    csv_pd.drop(columns=csv_pd.columns[0], 
        axis=1, 
        inplace=True)
    csv_array = csv_pd.to_numpy()
    
    return csv_array

In [41]:
%%time

#Set Date and Time
DateTime = "2022/10/21/13-18" #Date and Time - Dispayed in job file
# DateTime = None ##For Testing

#Set Parameters
exp_d = 1
n = 5
t = 20
tot_iter = bo_iters= 100 #Total Number of iters - Dispayed in job file
tot_runs = 15 #Total Number of runs - Dispayed in job file
sep_fact = np.linspace(0.1,1,1)
set_lengthscale = None
is_figure = False
save_figure = False
save_CSV = False


param_dict = {0 : '\\theta_1', 1 : '\\theta_2'}
indecies = [list(param_dict.keys())[list(param_dict.values()).index('\\theta_1')],
            list(param_dict.keys())[list(param_dict.values()).index('\\theta_2')]]

Theta_True = np.array([1,-1])

bounds = np.array([[-2, -2],
                   [ 2,  2]])

param_names_list = [param_dict[0], param_dict[1]] #Fill in depending on problem
mesh_combo = None

d = len(Theta_True)

obj = np.array(["obj","LN_obj"])
emulator = np.array([False, True])
sparse_grid = np.array([False,True])

#Pull Experimental data from CSV
exp_data_doc = 'Input_CSVs/Exp_Data/d='+str(exp_d)+'/n='+str(n)+'.csv'
exp_data = np.array(pd.read_csv(exp_data_doc, header=0,sep=","))
Xexp = exp_data[:,1:exp_d+1]
Yexp = exp_data[:,-1]

Xexp = exp_data[:,1]
Yexp = exp_data[:,2]
n = len(Xexp)
# print(len(Xexp) == len(Yexp))

#Define GP Testing space
p=20
theta_mesh = gen_theta_set(LHS = False, n_points = p, dimensions = d, bounds = bounds)

df_list = []
print("Runs:", tot_runs)
print("BO Iterations:",tot_iter)
print("%%%%%%%%%%%%%%%%%%%%%%%%%%")
count = 0
for emul in emulator: 
    sys.stdout.flush()
    obj_use = obj
    print("-------------------")
    print("Emulator?:", emul)
    if emul == True:
        t = n*t
        sparse_grid_use = sparse_grid
    else:
        sparse_grid_use = np.array([sparse_grid[0]]) #Sparse Grid will always be False for 2-Input
        
    for sparse in sparse_grid_use:
#         #Can set ep to 1 for sparse grid if wanted
        if sparse == True:
            obj_use =  np.array(["obj"])
        else:
            obj_use =  obj
#             ep_use = torch.tensor([1]) 
#         else:
#             ep_use = explore_bias
        print("______________________________")
        print("Sparse Grid?:", sparse)  
        
        for obj_func in obj_use:
            
            print("Objective Function:", obj_func)
            print("-  -  -  -  -  -  -  -  -  -  -")

            ep = set_ep(emul, obj_func, sparse)
            csv_end = ".csv" #For obj
            fxn = "plot_obj"

            median_obj =[]
            sep_fact_list = np.linspace(0.1,1,10)
            for k in range(len(sep_fact_list)):  
                path = path_name(emul, ep, sparse, fxn, set_lengthscale, t, obj_func, mesh_combo = None, bo_iter= None, 
                      title_save = None, run = None, tot_iter=tot_iter, tot_runs=tot_runs, DateTime = DateTime, 
                             sep_fact = sep_fact_list[k], is_figure = is_figure, csv_end = ".csv")
                # print(path)
                obj_array = csv_to_array(path)
                all_obj_list = []
                for i in range(tot_runs): 
                    #Plot data
                    obj_df_run = pd.DataFrame(data = obj_array[i])
                    obj_df_i = obj_df_run.loc[(abs(obj_df_run) > 1e-6).any(axis=1),0]
                    obj_no_zero_arr = list(np.array(obj_df_i).flatten())
                    all_obj_list += obj_no_zero_arr
                median_obj.append(np.median(all_obj_list))

            median_obj = np.array(np.exp(median_obj))

            med_iter_list = []
            sse_best_list = []
            iter_best_num_list = []
            iter_median_num_list = []
            run_best_num_list = []
            for k in range(len(sep_fact_list)):  
                csv_end = ".csv" #For obj
                fxn = "plot_obj"

                path = path_name(emul, ep, sparse, fxn, set_lengthscale, t, obj_func, mesh_combo = None, bo_iter= None, 
                          title_save = None, run = None, tot_iter=tot_iter, tot_runs=tot_runs, DateTime = DateTime, 
                                 sep_fact = sep_fact_list[k], is_figure = is_figure, csv_end = ".csv")
                # print(path)
                obj_abs_min = csv_to_array(path)

                iter_max = []
                run_best = []
                iter_best = []
                sse_best = []
                for i in range(tot_runs):
                    obj_min_df_run = pd.DataFrame(data = obj_abs_min[i])
                    obj_min_df_i = obj_min_df_run.loc[(abs(obj_min_df_run) > 1e-6).any(axis=1),0]
                    sse_best.append(np.amin(obj_min_df_i))
                    iter_max.append(len(obj_min_df_i))
                    min_iter = np.where(np.amin(obj_min_df_i) == obj_min_df_i) 
                    iter_best.append(min_iter[0][0])
                    run_best.append(i)
            #     print(iter_max)    
                iter_median_num_list.append(np.median(iter_max))
                iter_best_act = np.array(iter_best) + 1
                run_best_act = np.array(run_best) + 1 
                sse_best_num = np.amin(sse_best)
                sse_best_list.append(np.exp(sse_best_num))
                iter_best_ind = sse_best.index(sse_best_num)
                run_best_num = run_best_act[iter_best_ind]
                iter_best_num = iter_best_act[iter_best_ind]
                median_iters = np.median(iter_best_act)
                med_iter_list.append(median_iters)
                iter_best_num_list.append(iter_best_num) 
                run_best_num_list.append(run_best_num)

#             print(med_iter_list)
#             print(sse_best_list)
#             print(iter_best_num_list)
#             print(iter_median_num_list)
#             print(run_best_num_list)
            theta_Opt = np.zeros((len(sep_fact_list),d))
            for k in range(len(sep_fact_list)):
                Theta_array = []
                fxn = "plot_Theta"
                for z in range(len(param_dict.keys())):
                    csv_end = "/"+param_dict[z]+ ".csv" #For Thetas #Need to ensure this saves correctly

                    path_i = path_name(emul, ep, sparse, fxn, set_lengthscale, t, obj_func, mesh_combo = None, bo_iter= None, 
                              title_save = None, run = None, tot_iter=tot_iter, tot_runs=tot_runs, DateTime = DateTime, 
                                     sep_fact = sep_fact_list[k], is_figure = is_figure, csv_end = csv_end)
                    path_i = path_i.replace("Param_Conv/\\theta_"+str(z+1), "Theta_Conv/Theta_Conv_"+str(z+1))
            #         print(path_i)
                    Theta_array.append( csv_to_array(path_i) )

                Theta_array = np.array(Theta_array)
                Theat_Opt = Theta_array[:, iter_best_num_list[0]-1, run_best_num_list[0]-1]
                theta_Opt[k] = Theat_Opt

            df = pd.DataFrame(list(zip(sep_fact_list, theta_Opt[:,0], theta_Opt[:,1], sse_best_list, median_obj,iter_best_num_list,iter_median_num_list)),
               columns =['Separation Factor', 'Theta 1', 'Theta 2', 'Best SSE', "Median SSE", "Best Eval #", "Median Eval #"])
            df_list.append(df)




Runs: 15
BO Iterations: 100
%%%%%%%%%%%%%%%%%%%%%%%%%%
-------------------
Emulator?: False
______________________________
Sparse Grid?: False
Objective Function: obj
-  -  -  -  -  -  -  -  -  -  -
Objective Function: LN_obj
-  -  -  -  -  -  -  -  -  -  -
-------------------
Emulator?: True
______________________________
Sparse Grid?: False
Objective Function: obj
-  -  -  -  -  -  -  -  -  -  -
Objective Function: LN_obj
-  -  -  -  -  -  -  -  -  -  -
______________________________
Sparse Grid?: True
Objective Function: obj
-  -  -  -  -  -  -  -  -  -  -
CPU times: user 1.33 s, sys: 35.6 ms, total: 1.37 s
Wall time: 1.37 s


In [42]:
method_names = ["1A", "1B", "2A", "2B", "2C"] 
method_times = [0.0547, 0.100, 0.392, 4.36, 0.615] 
for i in range(len(df_list)):
    df_list[i].loc[:,"Time/Iter (Minutes)"] = method_times[i]
    df_list[i].loc[:,"Best Time"] = df_list[i].loc[:,"Best Eval #"]*df_list[i].loc[:,"Time/Iter (Minutes)"]
    df_list[i].loc[:,"Median Time"] = df_list[i].loc[:,"Median Eval #"]*df_list[i].loc[:,"Time/Iter (Minutes)"]
    df_list[i].loc[:,"Method"] = method_names[i]
    cols = df_list[i].columns.tolist()
    cols = cols[-1:] + cols[:-1]
    df_list[i] = df_list[i][cols]
#     print(df_list[i].head())

  Method  Separation Factor   Theta 1   Theta 2  Best SSE  Median SSE  \
0     1A                0.1  0.994778 -1.004316  0.000068    0.933479   
1     1A                0.2  0.347286 -0.942790  0.499692    2.919172   
2     1A                0.3  0.948718 -0.904418  0.021097   15.029638   
3     1A                0.4  0.000000  0.000000  4.305801   10.886895   
4     1A                0.5  0.000000  0.000000  4.943125   11.712681   

   Best Eval #  Median Eval #  Time/Iter (Minutes)  Best Time  Median Time  
0           45          100.0               0.0547     2.4615       5.4700  
1            8            4.0               0.0547     0.4376       0.2188  
2           28            3.0               0.0547     1.5316       0.1641  
3          100            5.0               0.0547     5.4700       0.2735  
4          100            6.0               0.0547     5.4700       0.3282  
  Method  Separation Factor   Theta 1   Theta 2  Best SSE  Median SSE  \
0     1B                0.

In [59]:
zero_data = np.zeros(shape=(len(method_names),4))
df_median_all = pd.DataFrame(zero_data, columns = ["Method", "Median SSE", "Median Eval #", "Median Time"])

for i in range(len(df_list)):
    df_median_all["Method"][i] = method_names[i]
    df_median_all["Median SSE"][i] = np.median(df_list[i].loc[:,"Median SSE"])
    df_median_all["Median Eval #"][i] = np.median(df_list[i].loc[:,"Median Eval #"])
    df_median_all["Median Time"][i] = np.median(df_list[i].loc[:,"Median Time"])

print(df_median_all)
df_median_all.to_csv("CS1_median_results.csv")
    

  Method  Median SSE  Median Eval #  Median Time
0     1A   11.817013           59.5      3.25465
1     1B    0.005699          100.0     10.00000
2     2A    0.002943          100.0     39.20000
3     2B    0.002911          100.0    436.00000
4     2C    0.260314            3.5      2.15250


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_median_all["Median SSE"][i] = np.median(df_list[i].loc[:,"Median SSE"])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_median_all["Median Eval #"][i] = np.median(df_list[i].loc[:,"Median Eval #"])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_median_all["Median Time"][i] = np.median(df_list[i].loc[:,"Median Time"])


In [35]:
df_all = pd.concat(df_list)
# df_all.to_csv("CS1_results.csv")
print(df_all.head())

  Method  Separation Factor   Theta 1   Theta 2  Best SSE  Median SSE  \
0     1A                0.1  0.994778 -1.004316  0.000068    0.933479   
1     1A                0.2  0.347286 -0.942790  0.499692    2.919172   
2     1A                0.3  0.948718 -0.904418  0.021097   15.029638   
3     1A                0.4  0.000000  0.000000  4.305801   10.886895   
4     1A                0.5  0.000000  0.000000  4.943125   11.712681   

   Best Eval #  Median Eval #  Time/Iter (Minutes)  Best Time  Median Time  
0           45          100.0               0.0547     2.4615       5.4700  
1            8            4.0               0.0547     0.4376       0.2188  
2           28            3.0               0.0547     1.5316       0.1641  
3          100            5.0               0.0547     5.4700       0.2735  
4          100            6.0               0.0547     5.4700       0.3282  


In [None]:
#Calc Overall Medians for each

In [38]:
# Not sure how to sort this the way I want but it's all here
df_all_sorted = df_all.sort_values(by=['Separation Factor', 'Method'], ascending=True)
print(df_all_sorted.tail())

  Method  Separation Factor   Theta 1   Theta 2   Best SSE  Median SSE  \
9     1A                1.0  1.042000 -1.577675  11.002158   11.198060   
9     1B                1.0  1.010287 -1.013514   0.000030    0.007396   
9     2A                1.0  0.991894 -0.993385   0.002554    0.003201   
9     2B                1.0  0.991585 -0.993331   0.002426    0.002770   
9     2C                1.0  0.856095 -0.980318   0.002225    0.012906   

   Best Eval #  Median Eval #  Time/Iter (Minutes)  Best Time  Median Time  
9          100          100.0               0.0547       5.47        5.470  
9          100          100.0               0.1000      10.00       10.000  
9           60          100.0               0.3920      23.52       39.200  
9           98          100.0               4.3600     427.28      436.000  
9            4            5.0               0.6150       2.46        3.075  
