## Table 1 Work

In [30]:
import wandb
import pandas as pd
import math

api = wandb.Api()
runs_table1_3k = api.runs(path="bogp/exp-gpr-src_benchmarks", filters={"$and": [{"tags": "table1-gq-attempt3"}, {"config.which_table": "1"}]})
runs_table1_10k = api.runs(path="bogp/exp-gpr-src_benchmarks", filters={"$and": [{"tags": "table1-gq-10k-points"}, {"config.which_table": "1"}]})

In [36]:
# ** Export to DataFrame **
'''
We're using 3000 train and test points
'''
def get_run_df(runs_table):
    summary_list, config_list, name_list = [], [], []
    for run in runs_table: 
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files 
        summary_list.append(run.summary._json_dict)

        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        config_list.append(
            {k: v for k,v in run.config.items()
            if not k.startswith('_')})

        # .name is the human-readable name of the run.
        name_list.append(run.name)
    return summary_list, config_list, name_list

summary_list_3k, config_list_3k, name_list_3k = get_run_df(runs_table1_3k)
summary_list_10k, config_list_10k, name_list_10k = get_run_df(runs_table1_10k)

runs_df_3k = pd.DataFrame({
    "summary": summary_list_3k,
    "config": config_list_3k,
    "name": name_list_3k
    })

runs_df_10k = pd.DataFrame({
    "summary": summary_list_10k,
    "config": config_list_10k,
    "name": name_list_10k
    })

In [38]:
runs_df_3k['summary'][28]['final_RMSE']

0.02939382940530777

In [37]:
runs_df_10k['summary'][28]['final_RMSE']

0.030289657413959503

In [40]:


#Fix dataframe
df = pd.DataFrame()
#Adding our data to the table option
def get_table(runs_df):
    run_data = []
    arr = range(runs_df.shape[0])
    for i in arr:
        #If the run finished...
        if('final_RMSE' in runs_df['summary'][i]):
            run_data.append({'file': runs_df['config'][i]['file'], 'method': runs_df['config'][i]['method'],
                        'RMSE': runs_df['summary'][i]['final_RMSE'], 'NLL': runs_df['summary'][i]['final_NLL'] }                               
                       )
        #Otherwise, just post that it failed
        else:
            run_data.append({'file': runs_df['config'][i]['file'], 'method': runs_df['config'][i]['method'],
                        'RMSE': '-', 'NLL': '-' }                               
                       )
    run_data.reverse()
    return run_data

#delete this
run_data_3k = get_table(runs_df)
run_data_10k = get_table(runs_df_10k)
table1_3k = pd.DataFrame(run_data_3k)
table1_10k = pd.DataFrame(run_data_10k)
table1_3k

Unnamed: 0,file,method,RMSE,NLL
0,Branin,SVGP,0.008698,-2.82202
1,SixHumpCamel,SVGP,0.028459,-2.106068
2,StyblinskiTang,SVGP,0.015086,-2.467119
3,SinCos,SVGP,0.002287,-3.234912
4,Hartmann,SVGP,0.148467,-0.300784
5,Welch2,SVGP,0.173335,-0.308071
6,Branin,PPGPR,0.008716,-3.004794
7,SixHumpCamel,PPGPR,0.029065,-2.714647
8,StyblinskiTang,PPGPR,0.043768,-2.628992
9,SinCos,PPGPR,0.001456,-3.298127


In [9]:
table1_10k

Unnamed: 0,file,method,RMSE,NLL
0,Branin,SVGP,0.001548,-3.11868
1,SixHumpCamel,SVGP,0.006268,-2.904613
2,StyblinskiTang,SVGP,0.003221,-3.025558
3,SinCos,SVGP,0.000859,-3.276683
4,Hartmann,SVGP,0.101277,-0.652879
5,Welch2,SVGP,0.151082,-0.462707
6,Branin,PPGPR,0.001783,-3.209666
7,SixHumpCamel,PPGPR,0.007312,-3.075205
8,StyblinskiTang,PPGPR,0.005177,-3.166855
9,SinCos,PPGPR,0.00095,-3.305687


In [42]:
import itertools

def plot_table(table1, run_data):
    #Trying to make multi-level table
    idx_data = []
    cols_data = []
    data_tb1 = []

    num_entries_done = 0
    entries_per_file = len(table1.file.unique())
    temp_arr = []
    for ii in range(len(table1)):
        #Two values for each header
        rmse_nll_num=2
        #Checking which error value was added to the table first
        rmse_done = 0
        nll_done = 0
        for num in range(rmse_nll_num):
            idx_data.append((table1.iloc[ii]['method']))
            if(rmse_done==0):
                cols_data.append((table1.iloc[ii]['file'], 'RMSE'))
                temp_arr.append(table1.iloc[ii]['RMSE'])
                rmse_done=1
                pass
            elif(nll_done==0):
                cols_data.append((table1.iloc[ii]['file'], 'NLL'))
                temp_arr.append(table1.iloc[ii]['NLL'])
                pass
        num_entries_done += 1
        if(num_entries_done == entries_per_file):
            data_tb1.append(temp_arr)
            temp_arr=[]
            num_entries_done=0
        

    new_data = list(itertools.chain.from_iterable(data_tb1))
    #idx_data

    data_tb1[0]

    idx_tb = pd.Index(table1['method'].unique())
    #len(data_tb1)
    columns = pd.MultiIndex.from_tuples(cols_data)
    #len(data_tb1[1])
    columns

    df_collection = []
    #Example, (0, 84, step=12)
    for ii in range(0, len(run_data)*2, math.floor(len(run_data)*2/len(table1['method'].unique())) ):
        #Step = 12, increment_one = (1,2,3,4,5,6,7)
        step = math.floor(len(run_data)*2/len(table1['method'].unique()))
        increment_one = math.floor(ii/step)
        temp = pd.DataFrame(data_tb1[increment_one], columns = idx_tb[increment_one:increment_one+1], 
                        index = columns[ii:ii+step]).transpose()
        df_collection.append(temp)
    
    return df_collection

    '''
new_table1 = pd.DataFrame(data_tb1[0], columns = idx_tb[:1], index = columns[:12]).transpose()
new_tablex = pd.DataFrame(data_tb1[1], columns = idx_tb[1:2], index = columns[12:24]).transpose()
mix = pd.concat([new_table1, new_tablex])
mix
'''
df_collection_3k = plot_table(table1_3k, run_data_3k)
df_collection_10k = plot_table(table1_10k, run_data_10k)

mix_3k = pd.concat(df_collection_3k)
mix_10k = pd.concat(df_collection_10k)
mix_3k


Unnamed: 0_level_0,Branin,Branin,SixHumpCamel,SixHumpCamel,StyblinskiTang,StyblinskiTang,SinCos,SinCos,Hartmann,Hartmann,Welch2,Welch2
Unnamed: 0_level_1,RMSE,NLL,RMSE,NLL,RMSE,NLL,RMSE,NLL,RMSE,NLL,RMSE,NLL
SVGP,0.008698,-2.82202,0.028459,-2.106068,0.015086,-2.467119,0.002287,-3.234912,0.148467,-0.300784,0.173335,-0.308071
PPGPR,0.008716,-3.004794,0.029065,-2.714647,0.043768,-2.628992,0.001456,-3.298127,0.371633,-0.902324,0.176301,-0.422997
GradGP,0.016257,-0.961477,0.029394,0.252653,0.047227,-0.873611,0.991578,1.56548,0.027042,-0.843867,-,-
GradSVGP,0.002802,-2.296718,-,-,0.01265,-0.727984,0.709389,1.452849,0.1783,0.825147,-,-
GradPPGPR,0.006883,-3.034069,0.066705,-2.610328,0.018822,-2.708077,0.781283,1.172164,0.389878,-1.130588,-,-
DSVGP2,0.005233,-2.261976,0.021432,-0.233119,0.023727,-1.258274,0.708664,1.451538,0.090765,0.127237,0.063235,-0.715209
DPPGPR2,0.013321,-3.055881,0.024325,-2.906329,0.020265,-2.688847,0.779719,1.170146,0.252212,-1.634004,0.218697,-0.332293


In [43]:
mix_10k

Unnamed: 0_level_0,Branin,Branin,SixHumpCamel,SixHumpCamel,StyblinskiTang,StyblinskiTang,SinCos,SinCos,Hartmann,Hartmann,Welch2,Welch2
Unnamed: 0_level_1,RMSE,NLL,RMSE,NLL,RMSE,NLL,RMSE,NLL,RMSE,NLL,RMSE,NLL
SVGP,0.001548,-3.11868,0.006268,-2.904613,0.003221,-3.025558,0.000859,-3.276683,0.101277,-0.652879,0.151082,-0.462707
PPGPR,0.001783,-3.209666,0.007312,-3.075205,0.005177,-3.166855,0.00095,-3.305687,0.307745,-1.263907,0.174456,-0.713637
GradGP,0.012305,-0.949366,0.03029,-0.208172,0.0917,-0.825225,0.998344,1.586895,-,-,-,-
GradSVGP,0.000384,-3.172409,0.004515,-1.954918,0.002751,-2.676576,0.706133,1.434846,0.059793,0.033268,-,-
GradPPGPR,0.001076,-3.326198,0.034676,-3.055859,0.003386,-3.297071,0.776052,1.165382,0.279265,-1.951382,-,-
DSVGP2,-,-,-,-,-,-,-,-,-,-,-,-
DPPGPR2,-,-,-,-,-,-,-,-,-,-,-,-


## Figure 1 Work

Train and Test points are all 3k, inducing numbers are different

Data SinCos: inducing numbers are 200, 400, 800, 1200

Data Hartmann: inducing numbers are 200, 400, 800, 1600, 3200

In [None]:
runs_table1_200 = api.runs(path="bogp/exp-gpr-src_benchmarks", filters={"$and": [{"tags": "table1-gq-attempt3"}, {"config.which_table": "1"}]})
runs_table1_400 = api.runs(path="bogp/exp-gpr-src_benchmarks", filters={"$and": [{"tags": "table1-gq-10k-points"}, {"config.which_table": "1"}]})
runs_table1_800 = api.runs(path="bogp/exp-gpr-src_benchmarks", filters={"$and": [{"tags": "table1-gq-attempt3"}, {"config.which_table": "1"}]})
runs_table1_1200 = api.runs(path="bogp/exp-gpr-src_benchmarks", filters={"$and": [{"tags": "table1-gq-10k-points"}, {"config.which_table": "1"}]})
runs_table1_1600 = api.runs(path="bogp/exp-gpr-src_benchmarks", filters={"$and": [{"tags": "table1-gq-10k-points"}, {"config.which_table": "1"}]})
runs_table1_3200 = api.runs(path="bogp/exp-gpr-src_benchmarks", filters={"$and": [{"tags": "table1-gq-10k-points"}, {"config.which_table": "1"}]})

In [1]:
from typing import List
from functools import reduce

def map_reduce(data: List[int], map_func, reduce_func):
    mapped_data = list(map(map_func, data))
    reduced_data = reduce(reduce_func, mapped_data)
    return reduced_data

# Input data
data = [1, 2, 3, 4, 5]

# Map function
def square(x):
    return x * x

# Reduce function
def sum_of_squares(x, y):
    return x + y

# Perform MapReduce
result = map_reduce(data, square, sum_of_squares)

print("Result:", result)

Result: 55
