In [1]:
import gpytorch
import numpy as np
import pandas as pd
import torch
from datetime import datetime
from matplotlib import pyplot as plt
import sys

import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300

import bo_methods_lib
from bo_methods_lib.bo_functions_generic import gen_theta_set

from bo_methods_lib.CS2_bo_plotters import plot_obj_abs_min, value_plotter, plot_obj, plot_Theta, plot_Theta_min,path_name, plot_org_train, save_fig, plot_EI_abs_max

from bo_methods_lib.CS2_bo_functions_multi_dim import set_ep

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#https://numpy.org/doc/stable/reference/generated/numpy.loadtxt.html

def csv_to_array(csv_path):
    """
    Turn a csv file into a numpy array so that it can be used with bo_plotters
    
    Parameters:
    -----------
        csv_path: str, path of csv you want to turn into an array
    Returns:
    --------
        csv_array: np.ndarray, array on values in the CSV file
    """
    
    csv_pd = pd.read_csv(csv_path, index_col = False)
    csv_pd.drop(columns=csv_pd.columns[0], 
        axis=1, 
        inplace=True)
    csv_array = csv_pd.to_numpy()
    
    return csv_array

In [33]:
df = pd.read_csv("CS1_results_by_iter.csv", header = 0, index_col = None)
# print(df.head)
print(df.columns)
# # print(df.head())

# # sort the dataframe
df.sort_values(by=['Method', "Separation Factor", "Run"], axis=0, inplace=True)

print(df['Method'].unique())
print(df['Separation Factor'].unique())
print(df['Run'].unique())

best_indecies = []
for meth in df['Method'].unique():
    for SF in df['Separation Factor'].unique():
        for run in df['Run'].unique():
            sse_run_best_value = df["Minimum SSE"][(df['Method'] == meth) & (df['Separation Factor'] == SF) & (df['Run'] == run) 
                                          & (df['Iteration'] == df["Max Eval"]) ]
#             print("Method", meth, "SF", SF, "Run", run)
#             print(sse_run_best_value)
            index = df.index[(df['Method'] == meth) & (df["Run"] == run) & (df["Separation Factor"] == SF) & (np.isclose(df["SSE"],sse_run_best_value))]
            best_indecies.append(index[0])
print(len(best_indecies))

Index(['Unnamed: 0', 'Method', 'Separation Factor', 'Run', 'Iteration', 'SSE',
       'Max Eval', 'Minimum SSE', 'Theta 1', 'Min Theta 1', 'Theta 2',
       'Min Theta 2', 'Time/Iter (Minutes)', 'Total Run Time'],
      dtype='object')
['1A' '1B' '2A' '2B' '2C']
[0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
750


In [122]:

df_best_run_sse = df.iloc[df.index.isin(best_indecies)]
# print(df_best_run_sse.head())

names=df_best_run_sse['Method'].unique().tolist()

for name in names:
    count = 0
    sparse_best_sses = df_best_run_sse["Minimum SSE"].loc[(df_best_run_sse['Method'] == name) & (df_best_run_sse['Separation Factor'] >= 1.0)]
#     print(sparse_best_sses)
    sparse_best_sses = sparse_best_sses.reset_index(drop=True)
#     print(sparse_best_sses.head())
    for i in range(len(sparse_best_sses)):
#         print(sparse_best_sses[i])
        if sparse_best_sses[i] >= 0.01:
            count +=1
    print(((1-(count/len(sparse_best_sses))))*100)

0.0
93.33333333333333
100.0
100.0
53.333333333333336


In [117]:
#Check for early Termination
Early_Term = []
Bad_Pred = []

names=df_best_run_sse['Method'].unique().tolist()

for name in names:
    df_meth = df_best_run_sse.loc[df_best_run_sse.Method==name]
    
    early_term_count = 0
    Last_iter_count = len(df_meth)
    bad_pred_count = 0
    df_meth = df_meth.reset_index(drop=True)
    for i in range(len(df_meth)):        
        if df_meth["Minimum SSE"][i] > 1e-2:
#                 print(df["SSE"][i])
            bad_pred_count += 1
        if df_meth["Max Eval"][i] < 100:
            early_term_count += 1

#     print(bad_pred_count, Last_iter_count, early_term_count)
    if Last_iter_count != 0:
        bad_percent = bad_pred_count/Last_iter_count
        Bad_Pred.append(bad_percent)
        Early_Term.append(early_term_count/Last_iter_count)
    else:
        Bad_Pred.append(0)
        
Good_Pred = (1-np.array(Bad_Pred))        
print("Bad Pred:", np.array(Bad_Pred)*100)
print("Good Pred:", np.array(Good_Pred)*100)
print("Early Termination:",np.array(Early_Term)*100)

Bad Pred: [99.33333333  5.33333333  0.66666667  2.66666667 62.        ]
Good Pred: [ 0.66666667 94.66666667 99.33333333 97.33333333 38.        ]
Early Termination: [ 48.           4.66666667   0.           0.         100.        ]


In [116]:
names=df_best_run_sse['Method'].unique().tolist()

for name in names:
    df_meth = df_best_run_sse.loc[df_best_run_sse.Method==name]

    good_pred_count = 0
    Last_iter_count = 0
    for i in range(len(df_meth)):
        df_meth = df_meth.reset_index(drop=True)
        if df_meth["Separation Factor"][i] >= 0.5:
            if df_meth["Min Theta 1"][i] == df_meth["Theta 1"][i] and df_meth["Min Theta 2"][i] == df_meth["Theta 2"][i]:
                Last_iter_count +=1
                theta_1_i = df_meth["Min Theta 1"][i]
                theta_2_i = df_meth["Min Theta 2"][i]
                percent_error = ((((theta_1_i-1)/1)**2 + ((theta_2_i+1)/-1)**2)/2)*100
                if percent_error <= 5.0:
                    good_pred_count += 1

    # print(bad_pred_count, Last_iter_count, early_term_count)
    if Last_iter_count != 0:
        good_pred_percent = good_pred_count/Last_iter_count

    print(name, ":", np.round(good_pred_percent*100,3))

1A : 1.111
1B : 100.0
2A : 100.0
2B : 100.0
2C : 91.111


In [7]:
#create unique list of names
# get a list of names
names=df_best_run_sse['Method'].unique().tolist()
# print(names)
# Create a list containing 1 dataframe for each method
df_list = []
for name in names:
    df_meth = df_best_run_sse.loc[df_best_run_sse.Method==name]   
    df_list.append(df_meth)

#Create new df for median values
df_median = pd.DataFrame()

#Loop over all method dataframes
for df_meth in df_list:
    #Add the row corresponding to the median value of SSE to the list
    df_median = pd.concat([df_median,df_meth[df_meth['SSE']==df_meth['SSE'].quantile(interpolation='nearest')]])
#Drop unneeded columns
df_median = df_median.drop(["Run", "Time/Iter (Minutes)", "Min Theta 1", "Min Theta 2", "Unnamed: 0", "Total Run Time"], axis =1)

print(df_median)
df_median.to_csv("CS1_results_best_sse_median.csv", index=False)

      Method  Separation Factor  Iteration        SSE  Max Eval  Minimum SSE  \
2117      1A                0.4          5  14.474867        17    14.474867   
20576     1B                0.9         68   0.000181       100     0.000181   
34040     2A                0.8         32   0.002791       100     0.002791   
45017     2B                0.6          9   0.002619       100     0.002619   
52644     2C                0.5          2   0.279945         2     0.279945   

        Theta 1   Theta 2  
2117   1.396950 -1.618143  
20576  0.995873 -1.005198  
34040  0.994391 -0.994333  
45017  0.994747 -0.994625  
52644  0.887138 -0.933537  


In [8]:
#Fiind median SSE over all runs, check for early termination, and number of good predictions
df = pd.read_csv("CS1_results_by_iter.csv", header = 0)

# print(df.columns)
# # print(df.head())

# # sort the dataframe
df.sort_values(by=['Method', "Separation Factor", "Run"], axis=0, inplace=True)
df = df.drop( ['Unnamed: 0'], axis =1)
# print(df['Method'].unique())
# print(df['Separation Factor'].unique())
# print(df['Run'].unique())

names=df['Method'].unique().tolist()
# Create a list containing 1 dataframe for each method
df_list = []
for name in names:
    df_meth = df.loc[df.Method==name]   
    df_list.append(df_meth)

#Create new df for median values
df_median = pd.DataFrame()

#Loop over all method dataframes
for df_meth in df_list:
    #Add the row corresponding to the median value of SSE to the list
    df_median = pd.concat([df_median,df_meth[df_meth['SSE']==df_meth['SSE'].quantile(interpolation='nearest')]])
df_median = df_median.drop(["Run", "Time/Iter (Minutes)", "Min Theta 1", "Min Theta 2", "Total Run Time"], axis =1)

print(df_median)
df_median.to_csv("CS1_results_true_median.csv", index=False)

# print(df.head())

      Method  Separation Factor  Iteration        SSE  Max Eval  Minimum SSE  \
7718      1A                1.0         34  11.301864       100    11.301864   
8353      1B                0.1         69   0.005777       100     0.000855   
28934     2A                0.5         26   0.003013       100     0.000771   
40847     2B                0.3         39   0.002895       100     0.002710   
52669     2C                0.5          5   0.198062         8     0.198062   

        Theta 1   Theta 2  
7718   1.042161 -1.579298  
8353   1.005438 -1.014776  
28934  0.993380 -0.993982  
40847  0.993693 -0.994164  
52669  0.865306 -0.972494  


In [9]:
#Check for early Termination
Early_Term = []
Bad_Pred = []

names=df['Method'].unique().tolist()

for name in names:
    df_meth = df.loc[df.Method==name]
    
    early_term_count = 0
    Last_iter_count = 0
    bad_pred_count = 0
    df_meth = df_meth.reset_index(drop=True)
    for i in range(len(df_meth)):        
        if df_meth["Iteration"][i] == df_meth["Max Eval"][i]:
            Last_iter_count +=1
            if df_meth["Minimum SSE"][i] > 1e-2:
    #                 print(df["SSE"][i])
                bad_pred_count += 1
            if df_meth["Max Eval"][i] != 100:
                early_term_count += 1

#     print(bad_pred_count, Last_iter_count, early_term_count)
    if Last_iter_count != 0:
        bad_percent = bad_pred_count/Last_iter_count
        Bad_Pred.append(bad_percent)
        Early_Term.append(early_term_count/Last_iter_count)
    else:
        Bad_Pred.append(0)
        
Good_Pred = (1-np.array(Bad_Pred))        
print("Bad Pred:", np.array(Bad_Pred)*100)
print("Good Pred:", np.array(Good_Pred)*100)
print("Early Termination:",np.array(Early_Term)*100)

Bad Pred: [99.33333333  5.33333333  0.66666667  2.66666667 62.        ]
Good Pred: [ 0.66666667 94.66666667 99.33333333 97.33333333 38.        ]
Early Termination: [ 48.           4.66666667   0.           0.         100.        ]
