In [1]:
import pandas as pd
from tqdm.notebook import tqdm
from os.path import exists
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

from data.get_uci import all_datasets
from analysis.util import fetch, init_uci_dict, get_uci_info

In [2]:
filters = {
    "group": "benchmark-noworkers"
}
raw_orig = fetch("softki", filters)

100%|██████████| 144/144 [00:57<00:00,  2.52it/s]


In [3]:
filters = {
    "group": "benchmark-noworkers-nohutch"
}
raw = fetch("softki", filters)

100%|██████████| 36/36 [00:15<00:00,  2.31it/s]


In [4]:
uci_info = get_uci_info()

In [5]:
uci_dict = {}
for exp in raw:
    model = exp.config["model.name"]
    dataset = exp.config["dataset.name"]
    if model == "exact":
        continue
    num_inducing = exp.config["model.num_inducing"]
    dtype = exp.config["model.dtype"]
    seed = exp.config["training.seed"]
    train_frac = float(exp.config["dataset.train_frac"])
    if (dataset, seed, num_inducing, train_frac, model) in uci_dict:
        print("FAIL", (dataset, seed, num_inducing, train_frac, model))
    uci_dict[(dataset, seed, num_inducing, train_frac, model)] = exp.history

for exp in raw_orig:
    model = exp.config["model.name"]
    dataset = exp.config["dataset.name"]
    if model == "exact":
        continue
    num_inducing = exp.config["model.num_inducing"]
    dtype = exp.config["model.dtype"]
    seed = exp.config["training.seed"]
    train_frac = float(exp.config["dataset.train_frac"])
    if (dataset, seed, num_inducing, train_frac, model+"-hutch") in uci_dict:
        print("FAIL", (dataset, seed, num_inducing, train_frac, model+"-hutch"))
    uci_dict[(dataset, seed, num_inducing, train_frac, model+"-hutch")] = exp.history

In [6]:
seeds = [6535, 8830, 92357]
num_inducings = [512, 1024]
KZZ = {}
all_bins = {}
fracs = [0.9]
tmp = {
    "N": [int(np.floor(N * 0.9)) for _, N, _, _ in uci_info],
    "D": [D for _, _, D, _ in uci_info],
}
models = ["soft-gp", "soft-gp-hutch"]

for seed in seeds:
    for model in models:
        for num_inducing in num_inducings:
            for frac in fracs:
                xs = []
                ts = []
                for dataset, _, _, _ in uci_info:
                    try:
                        xs += [uci_dict[(dataset, seed, num_inducing, frac, model)]["test_rmse"][49]]
                        ts += [np.array(uci_dict[(dataset, seed, num_inducing, frac, model)]["epoch_time"][49]).mean()]
                    except Exception as e:
                        xs += [np.nan]
                        ts += [np.nan]
                        print("Exception", e, model, dataset)

                tmp[f"{model}-{num_inducing}-{frac}-{seed}"] = xs
                tmp[f"time-{model}-{num_inducing}-{frac}-{seed}"] = ts

df = pd.DataFrame(data=tmp)
df.index = [name.capitalize().replace("_", "-") for name, _, _, _ in uci_info]
df

Exception 49 soft-gp protein
Exception 49 soft-gp keggundirected
Exception 'test_rmse' soft-gp 3droad
Exception 49 soft-gp houseelectric
Exception ('pol', 6535, 1024, 0.9, 'soft-gp') soft-gp pol
Exception ('elevators', 6535, 1024, 0.9, 'soft-gp') soft-gp elevators
Exception ('bike', 6535, 1024, 0.9, 'soft-gp') soft-gp bike
Exception ('kin40k', 6535, 1024, 0.9, 'soft-gp') soft-gp kin40k
Exception ('protein', 6535, 1024, 0.9, 'soft-gp') soft-gp protein
Exception ('keggdirected', 6535, 1024, 0.9, 'soft-gp') soft-gp keggdirected
Exception ('slice', 6535, 1024, 0.9, 'soft-gp') soft-gp slice
Exception ('keggundirected', 6535, 1024, 0.9, 'soft-gp') soft-gp keggundirected
Exception ('3droad', 6535, 1024, 0.9, 'soft-gp') soft-gp 3droad
Exception ('song', 6535, 1024, 0.9, 'soft-gp') soft-gp song
Exception ('buzz', 6535, 1024, 0.9, 'soft-gp') soft-gp buzz
Exception ('houseelectric', 6535, 1024, 0.9, 'soft-gp') soft-gp houseelectric
Exception ('pol', 6535, 1024, 0.9, 'soft-gp-hutch') soft-gp-hutch

Unnamed: 0,N,D,soft-gp-512-0.9-6535,time-soft-gp-512-0.9-6535,soft-gp-1024-0.9-6535,time-soft-gp-1024-0.9-6535,soft-gp-hutch-512-0.9-6535,time-soft-gp-hutch-512-0.9-6535,soft-gp-hutch-1024-0.9-6535,time-soft-gp-hutch-1024-0.9-6535,...,soft-gp-hutch-1024-0.9-8830,time-soft-gp-hutch-1024-0.9-8830,soft-gp-512-0.9-92357,time-soft-gp-512-0.9-92357,soft-gp-1024-0.9-92357,time-soft-gp-1024-0.9-92357,soft-gp-hutch-512-0.9-92357,time-soft-gp-hutch-512-0.9-92357,soft-gp-hutch-1024-0.9-92357,time-soft-gp-hutch-1024-0.9-92357
Pol,13500,26,0.18655,0.928962,,,0.189091,0.876965,,,...,,,0.19292,0.845349,,,0.194141,0.822344,,
Elevators,14939,18,0.39142,0.969514,,,0.391091,0.988241,,,...,,,0.378425,0.922128,,,0.377992,1.031611,,
Bike,15641,17,0.205804,1.031143,,,0.207342,0.969027,,,...,,,0.196118,1.078124,,,0.197694,0.956885,,
Kin40k,36000,8,0.240425,2.237784,,,0.242823,2.358266,,,...,,,0.239835,2.364171,,,0.240351,2.458695,,
Protein,41157,9,,,,,0.656481,2.508594,,,...,,,,,,,0.652692,2.794031,,
Keggdirected,43944,20,0.079154,2.879887,,,0.07885,2.911889,,,...,,,0.085934,3.190022,,,0.086857,2.808066,,
Slice,48150,385,0.021478,3.672297,,,0.051091,3.819275,,,...,,,0.030007,3.684675,,,0.049954,3.903771,,
Keggundirected,57247,27,,,,,0.116044,3.655165,,,...,,,,,,,0.118493,3.467353,,
3droad,391386,3,,,,,0.605115,24.565613,,,...,,,,,,,0.605276,25.880858,,
Song,270000,90,0.797257,19.480093,,,0.798795,19.215702,,,...,,,0.78887,19.176009,,,0.790789,19.571849,,


# Plot RMSE

In [7]:
def pm_var(df, model):
    # print(df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].mean(axis=1))
    m = df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].mean(axis=1).round(3).astype(str)
    v = df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].std(axis=1).round(3).astype(str).apply(lambda x: f" $\pm$ {x}")
    return (m + v).apply(lambda x: x.replace("nan $\pm$ nan", "-"))
df_rmse = pd.DataFrame()
df_rmse[['N', 'D']] = df[['N', 'D']]
for model in ["soft-gp-512", "soft-gp-hutch-512"]:
    for frac in [0.9]:
        df_rmse[f'{model}-{frac}'] = pm_var(df, f'{model}-{frac}')

df_rmse

Unnamed: 0,N,D,soft-gp-512-0.9,soft-gp-hutch-512-0.9
Pol,13500,26,0.193 $\pm$ 0.006,0.195 $\pm$ 0.006
Elevators,14939,18,0.389 $\pm$ 0.01,0.389 $\pm$ 0.01
Bike,15641,17,0.203 $\pm$ 0.006,0.204 $\pm$ 0.006
Kin40k,36000,8,0.24 $\pm$ 0.0,0.237 $\pm$ 0.008
Protein,41157,9,-,0.649 $\pm$ 0.01
Keggdirected,43944,20,0.081 $\pm$ 0.005,0.081 $\pm$ 0.005
Slice,48150,385,0.024 $\pm$ 0.005,0.049 $\pm$ 0.003
Keggundirected,57247,27,-,0.115 $\pm$ 0.004
3droad,391386,3,-,0.605 $\pm$ 0.0
Song,270000,90,0.793 $\pm$ 0.004,0.793 $\pm$ 0.005


In [8]:
df2 = df_rmse[["N", "D", "soft-gp-hutch-512-0.9", "soft-gp-512-0.9"]]
df2 = df2.sort_values(by=['D'], ascending=[True])
df2

Unnamed: 0,N,D,soft-gp-hutch-512-0.9,soft-gp-512-0.9
3droad,391386,3,0.605 $\pm$ 0.0,-
Kin40k,36000,8,0.237 $\pm$ 0.008,0.24 $\pm$ 0.0
Protein,41157,9,0.649 $\pm$ 0.01,-
Houseelectric,1844352,11,0.064 $\pm$ 0.001,-
Bike,15641,17,0.204 $\pm$ 0.006,0.203 $\pm$ 0.006
Elevators,14939,18,0.389 $\pm$ 0.01,0.389 $\pm$ 0.01
Keggdirected,43944,20,0.081 $\pm$ 0.005,0.081 $\pm$ 0.005
Pol,13500,26,0.195 $\pm$ 0.006,0.193 $\pm$ 0.006
Keggundirected,57247,27,0.115 $\pm$ 0.004,-
Buzz,524925,77,0.254 $\pm$ 0.0,0.248 $\pm$ 0.001


In [9]:
latex_table = df2.to_latex(
    index=True,
    escape=False,
    float_format="{:0.3f}".format,
)
print(latex_table)

\begin{tabular}{lrrll}
\toprule
 & N & D & soft-gp-hutch-512-0.9 & soft-gp-512-0.9 \\
\midrule
3droad & 391386 & 3 & 0.605 $\pm$ 0.0 & - \\
Kin40k & 36000 & 8 & 0.237 $\pm$ 0.008 & 0.24 $\pm$ 0.0 \\
Protein & 41157 & 9 & 0.649 $\pm$ 0.01 & - \\
Houseelectric & 1844352 & 11 & 0.064 $\pm$ 0.001 & - \\
Bike & 15641 & 17 & 0.204 $\pm$ 0.006 & 0.203 $\pm$ 0.006 \\
Elevators & 14939 & 18 & 0.389 $\pm$ 0.01 & 0.389 $\pm$ 0.01 \\
Keggdirected & 43944 & 20 & 0.081 $\pm$ 0.005 & 0.081 $\pm$ 0.005 \\
Pol & 13500 & 26 & 0.195 $\pm$ 0.006 & 0.193 $\pm$ 0.006 \\
Keggundirected & 57247 & 27 & 0.115 $\pm$ 0.004 & - \\
Buzz & 524925 & 77 & 0.254 $\pm$ 0.0 & 0.248 $\pm$ 0.001 \\
Song & 270000 & 90 & 0.793 $\pm$ 0.005 & 0.793 $\pm$ 0.004 \\
Slice & 48150 & 385 & 0.049 $\pm$ 0.003 & 0.024 $\pm$ 0.005 \\
\bottomrule
\end{tabular}

