In [1]:
import gpytorch
import numpy as np
import pandas as pd
import torch
from datetime import datetime
from matplotlib import pyplot as plt
import sys

import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300

import bo_methods_lib
from bo_methods_lib.bo_functions_generic import gen_theta_set

from bo_methods_lib.CS2_bo_plotters import plot_obj_abs_min, value_plotter, plot_obj, plot_Theta, plot_Theta_min,path_name, plot_org_train, save_fig, plot_EI_abs_max

from bo_methods_lib.CS2_bo_functions_multi_dim import set_ep

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#https://numpy.org/doc/stable/reference/generated/numpy.loadtxt.html

def csv_to_array(csv_path):
    """
    Turn a csv file into a numpy array so that it can be used with bo_plotters
    
    Parameters:
    -----------
        csv_path: str, path of csv you want to turn into an array
    Returns:
    --------
        csv_array: np.ndarray, array on values in the CSV file
    """
    
    csv_pd = pd.read_csv(csv_path, index_col = False)
    csv_pd.drop(columns=csv_pd.columns[0], 
        axis=1, 
        inplace=True)
    csv_array = csv_pd.to_numpy()
    
    return csv_array

In [3]:
df = pd.read_csv("CS1_results_by_iter.csv", header = 0, index_col = 0)
# print(df.columns)
# print(df.head())

# sort the dataframe
# df.sort_values(by=['Method'], axis=1, inplace=True)

# set the index to be this and don't drop
df.set_index(keys=['Method'], drop=False,inplace=True)

# get a list of names
names=df['Method'].unique().tolist()

# now we can perform a lookup on a 'view' of the dataframe

df_list = []
for name in names:
    df_item = df.loc[df.Method==str(name)]
    df_item.pop(df.columns[0])
    df_list.append(df_item)

Early_Term = []
Bad_Pred = []
for df in df_list:
    early_term_count = 0
    Last_iter_count = 0
    bad_pred_count = 0
    for i in range(len(df)):
        if df["Iteration"][i] == df["Max Eval"][i]:
            Last_iter_count +=1
            if df["Minimum SSE"][i] > 1e-2:
#                 print(df["SSE"][i])
                bad_pred_count += 1
            if df["Max Eval"][i] != 100:
                early_term_count += 1
    
    print(bad_pred_count, Last_iter_count, early_term_count)
    if Last_iter_count != 0:
        bad_percent = bad_pred_count/Last_iter_count
        Bad_Pred.append(bad_percent)
    else:
        Bad_Pred.append(0)

    Early_Term.append(early_term_count/Last_iter_count)
    
        
    

149 150 72
8 150 7
1 150 0
4 150 0
93 150 150


In [4]:
print(np.array(Bad_Pred)*100)
print(np.array(Early_Term)*100)

[99.33333333  5.33333333  0.66666667  2.66666667 62.        ]
[ 48.           4.66666667   0.           0.         100.        ]


In [6]:
df_median = pd.DataFrame()

for df in df_list:
    df_median = pd.concat([df_median,df[df['SSE']==df['SSE'].quantile(interpolation='nearest')]])
df_median = df_median.drop(["Run", "Time/Iter (Minutes)", "Min Theta 1", "Min Theta 2"], axis =1)

print(df_median)

        Separation Factor  Iteration        SSE  Max Eval  Minimum SSE  \
Method                                                                   
1A                    1.0         34  11.301864       100    11.301864   
1B                    0.1         69   0.005777       100     0.000855   
2A                    0.5         26   0.003013       100     0.000771   
2B                    0.3         39   0.002895       100     0.002710   
2C                    0.5          5   0.198062         8     0.198062   

         Theta 1   Theta 2  Total Run Time  
Method                                      
1A      1.042161 -1.579298            5.47  
1B      1.005438 -1.014776           10.00  
2A      0.993380 -0.993982           39.20  
2B      0.993693 -0.994164          436.00  
2C      0.865306 -0.972494            4.92  


In [7]:
df_median.to_csv("CS1_results_median.csv")