In [1]:
import pandas as pd
from tqdm.notebook import tqdm
from os.path import exists
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

from data.get_uci import all_datasets
from analysis.util import fetch, init_uci_dict, get_uci_info

In [2]:
filters = {
    "group": "benchmark-noworkers"
}
raw = fetch("softki", filters)

100%|██████████| 144/144 [00:54<00:00,  2.62it/s]


In [3]:
uci_info = get_uci_info()

In [4]:
uci_dict = {}
for exp in raw:
    model = exp.config["model.name"]
    dataset = exp.config["dataset.name"]
    if model == "exact":
        continue
    num_inducing = exp.config["model.num_inducing"]
    dtype = exp.config["model.dtype"]
    seed = exp.config["training.seed"]
    train_frac = float(exp.config["dataset.train_frac"])
    if (dataset, seed, num_inducing, train_frac, model) in uci_dict:
        print("FAIL", (dataset, seed, num_inducing, train_frac, model))
    uci_dict[(dataset, seed, num_inducing, train_frac, model)] = exp.history

In [5]:
seeds = [6535, 8830, 92357]
num_inducings = [512, 1024]
KZZ = {}
all_bins = {}
fracs = [0.9]
tmp = {
    "N": [int(np.floor(N * 0.9)) for _, N, _, _ in uci_info],
    "D": [D for _, _, D, _ in uci_info],
}
models = ["soft-gp", "svi-gp", "sv-gp"]

for seed in seeds:
    for model in models:
        for num_inducing in num_inducings:
            for frac in fracs:
                xs = []
                ts = []
                for dataset, _, _, _ in uci_info:
                    try:
                        xs += [uci_dict[(dataset, seed, num_inducing, frac, model)]["test_rmse"][49]]
                        ts += [np.array(uci_dict[(dataset, seed, num_inducing, frac, model)]["epoch_time"][49]).mean()]
                    except Exception as e:
                        xs += [np.nan]
                        ts += [np.nan]
                        print("Exception", e, model, dataset)

                tmp[f"{model}-{num_inducing}-{frac}-{seed}"] = xs
                tmp[f"time-{model}-{num_inducing}-{frac}-{seed}"] = ts

df = pd.DataFrame(data=tmp)
df.index = [name.capitalize().replace("_", "-") for name, _, _, _ in uci_info]
df

Exception ('pol', 6535, 1024, 0.9, 'soft-gp') soft-gp pol
Exception ('elevators', 6535, 1024, 0.9, 'soft-gp') soft-gp elevators
Exception ('bike', 6535, 1024, 0.9, 'soft-gp') soft-gp bike
Exception ('kin40k', 6535, 1024, 0.9, 'soft-gp') soft-gp kin40k
Exception ('protein', 6535, 1024, 0.9, 'soft-gp') soft-gp protein
Exception ('keggdirected', 6535, 1024, 0.9, 'soft-gp') soft-gp keggdirected
Exception ('slice', 6535, 1024, 0.9, 'soft-gp') soft-gp slice
Exception ('keggundirected', 6535, 1024, 0.9, 'soft-gp') soft-gp keggundirected
Exception ('3droad', 6535, 1024, 0.9, 'soft-gp') soft-gp 3droad
Exception ('song', 6535, 1024, 0.9, 'soft-gp') soft-gp song
Exception ('buzz', 6535, 1024, 0.9, 'soft-gp') soft-gp buzz
Exception ('houseelectric', 6535, 1024, 0.9, 'soft-gp') soft-gp houseelectric
Exception 'test_rmse' sv-gp 3droad
Exception 'test_rmse' sv-gp song
Exception 'test_rmse' sv-gp buzz
Exception 'test_rmse' sv-gp houseelectric
Exception ('pol', 6535, 1024, 0.9, 'sv-gp') sv-gp pol
Excep

Unnamed: 0,N,D,soft-gp-512-0.9-6535,time-soft-gp-512-0.9-6535,soft-gp-1024-0.9-6535,time-soft-gp-1024-0.9-6535,svi-gp-512-0.9-6535,time-svi-gp-512-0.9-6535,svi-gp-1024-0.9-6535,time-svi-gp-1024-0.9-6535,...,soft-gp-1024-0.9-92357,time-soft-gp-1024-0.9-92357,svi-gp-512-0.9-92357,time-svi-gp-512-0.9-92357,svi-gp-1024-0.9-92357,time-svi-gp-1024-0.9-92357,sv-gp-512-0.9-92357,time-sv-gp-512-0.9-92357,sv-gp-1024-0.9-92357,time-sv-gp-1024-0.9-92357
Pol,13500,26,0.189091,0.876965,,,0.31135,1.005594,0.29388,1.28605,...,,,0.321628,1.032962,0.302315,1.305013,0.265964,0.012367,,
Elevators,14939,18,0.391091,0.988241,,,0.390418,1.111907,0.388201,1.370416,...,,,0.381833,1.103708,0.380782,1.357116,0.394364,0.012302,,
Bike,15641,17,0.207342,0.969027,,,0.284496,1.147642,0.271696,1.429637,...,,,0.278347,1.166958,0.266256,1.453402,0.282281,0.01233,,
Kin40k,36000,8,0.242823,2.358266,,,0.235442,2.860872,0.197408,3.402015,...,,,0.229365,2.690426,0.192121,3.395707,0.201698,0.014128,,
Protein,41157,9,0.656481,2.508594,,,0.692929,2.97312,0.666736,3.765126,...,,,0.687825,3.115169,0.660697,3.722421,0.663185,0.015156,,
Keggdirected,43944,20,0.07885,2.911889,,,0.08636,3.268192,0.085367,4.027421,...,,,0.094027,3.170347,0.093826,4.067539,0.096246,0.014636,,
Slice,48150,385,0.051091,3.819275,,,0.139469,3.769376,0.12824,4.747617,...,,,0.137783,3.733067,0.125366,4.636225,0.454743,0.017507,,
Keggundirected,57247,27,0.116044,3.655165,,,0.130474,4.310255,0.130336,5.281487,...,,,0.131527,4.206492,0.131152,5.246654,0.13207,0.011946,,
3droad,391386,3,0.605115,24.565613,,,0.531515,28.881122,0.496384,36.203022,...,,,0.52814,28.818359,0.495213,35.465348,,,,
Song,270000,90,0.798795,19.215702,,,0.810078,20.334827,0.808013,24.967439,...,,,0.801442,20.607333,0.799972,25.050361,,,,


# Plot RMSE

In [6]:
def pm_var(df, model):
    # print(df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].mean(axis=1))
    m = df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].mean(axis=1).round(3).astype(str)
    v = df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].std(axis=1).round(3).astype(str).apply(lambda x: f" $\pm$ {x}")
    return (m + v).apply(lambda x: x.replace("nan $\pm$ nan", "-"))
df_rmse = pd.DataFrame()
df_rmse[['N', 'D']] = df[['N', 'D']]
for model in ["soft-gp-512", "sv-gp-512", "svi-gp-1024"]:
    for frac in [0.9]:
        df_rmse[f'{model}-{frac}'] = pm_var(df, f'{model}-{frac}')

df_rmse

Unnamed: 0,N,D,soft-gp-512-0.9,sv-gp-512-0.9,svi-gp-1024-0.9
Pol,13500,26,0.195 $\pm$ 0.006,0.26 $\pm$ 0.007,0.3 $\pm$ 0.005
Elevators,14939,18,0.389 $\pm$ 0.01,0.401 $\pm$ 0.007,0.389 $\pm$ 0.009
Bike,15641,17,0.204 $\pm$ 0.006,0.284 $\pm$ 0.002,0.268 $\pm$ 0.003
Kin40k,36000,8,0.237 $\pm$ 0.008,0.205 $\pm$ 0.008,0.192 $\pm$ 0.006
Protein,41157,9,0.649 $\pm$ 0.01,0.66 $\pm$ 0.009,0.659 $\pm$ 0.008
Keggdirected,43944,20,0.081 $\pm$ 0.005,0.089 $\pm$ 0.006,0.088 $\pm$ 0.005
Slice,48150,385,0.049 $\pm$ 0.003,0.479 $\pm$ 0.027,0.127 $\pm$ 0.001
Keggundirected,57247,27,0.115 $\pm$ 0.004,0.125 $\pm$ 0.007,0.127 $\pm$ 0.007
3droad,391386,3,0.605 $\pm$ 0.0,-,0.496 $\pm$ 0.001
Song,270000,90,0.793 $\pm$ 0.005,-,0.803 $\pm$ 0.004


In [7]:
df2 = df_rmse[["N", "D", "soft-gp-512-0.9", "sv-gp-512-0.9", "svi-gp-1024-0.9"]]
df2 = df2.sort_values(by=['D'], ascending=[True])
df2

Unnamed: 0,N,D,soft-gp-512-0.9,sv-gp-512-0.9,svi-gp-1024-0.9
3droad,391386,3,0.605 $\pm$ 0.0,-,0.496 $\pm$ 0.001
Kin40k,36000,8,0.237 $\pm$ 0.008,0.205 $\pm$ 0.008,0.192 $\pm$ 0.006
Protein,41157,9,0.649 $\pm$ 0.01,0.66 $\pm$ 0.009,0.659 $\pm$ 0.008
Houseelectric,1844352,11,0.064 $\pm$ 0.001,-,0.071 $\pm$ 0.0
Bike,15641,17,0.204 $\pm$ 0.006,0.284 $\pm$ 0.002,0.268 $\pm$ 0.003
Elevators,14939,18,0.389 $\pm$ 0.01,0.401 $\pm$ 0.007,0.389 $\pm$ 0.009
Keggdirected,43944,20,0.081 $\pm$ 0.005,0.089 $\pm$ 0.006,0.088 $\pm$ 0.005
Pol,13500,26,0.195 $\pm$ 0.006,0.26 $\pm$ 0.007,0.3 $\pm$ 0.005
Keggundirected,57247,27,0.115 $\pm$ 0.004,0.125 $\pm$ 0.007,0.127 $\pm$ 0.007
Buzz,524925,77,0.254 $\pm$ 0.0,-,0.296 $\pm$ 0.001


In [8]:
latex_table = df2.to_latex(
    index=True,
    escape=False,
    float_format="{:0.3f}".format,
)
print(latex_table)

\begin{tabular}{lrrlll}
\toprule
 & N & D & soft-gp-512-0.9 & sv-gp-512-0.9 & svi-gp-1024-0.9 \\
\midrule
3droad & 391386 & 3 & 0.605 $\pm$ 0.0 & - & 0.496 $\pm$ 0.001 \\
Kin40k & 36000 & 8 & 0.237 $\pm$ 0.008 & 0.205 $\pm$ 0.008 & 0.192 $\pm$ 0.006 \\
Protein & 41157 & 9 & 0.649 $\pm$ 0.01 & 0.66 $\pm$ 0.009 & 0.659 $\pm$ 0.008 \\
Houseelectric & 1844352 & 11 & 0.064 $\pm$ 0.001 & - & 0.071 $\pm$ 0.0 \\
Bike & 15641 & 17 & 0.204 $\pm$ 0.006 & 0.284 $\pm$ 0.002 & 0.268 $\pm$ 0.003 \\
Elevators & 14939 & 18 & 0.389 $\pm$ 0.01 & 0.401 $\pm$ 0.007 & 0.389 $\pm$ 0.009 \\
Keggdirected & 43944 & 20 & 0.081 $\pm$ 0.005 & 0.089 $\pm$ 0.006 & 0.088 $\pm$ 0.005 \\
Pol & 13500 & 26 & 0.195 $\pm$ 0.006 & 0.26 $\pm$ 0.007 & 0.3 $\pm$ 0.005 \\
Keggundirected & 57247 & 27 & 0.115 $\pm$ 0.004 & 0.125 $\pm$ 0.007 & 0.127 $\pm$ 0.007 \\
Buzz & 524925 & 77 & 0.254 $\pm$ 0.0 & - & 0.296 $\pm$ 0.001 \\
Song & 270000 & 90 & 0.793 $\pm$ 0.005 & - & 0.803 $\pm$ 0.004 \\
Slice & 48150 & 385 & 0.049 $\pm$ 0.003

# Plot Time

In [9]:
def pm_var_time(df, model):
    m = df[[f'time-{model}-6535', f'time-{model}-8830', f'time-{model}-92357']].mean(axis=1).round(3).astype(str)
    v = df[[f'time-{model}-6535', f'time-{model}-8830', f'time-{model}-92357']].std(axis=1).round(3).astype(str).apply(lambda x: f" $\pm$ {x}")
    return (m + v).apply(lambda x: x.replace("nan $\pm$ nan", "-"))

tmp = {
    "N": [int(np.floor(N * .9)) for _, N, _, _ in uci_info],
    "D": [D for _, _, D, _ in uci_info],
    "exact-rep": [e for _, _, _, e in uci_info],
}
df_time = pd.DataFrame()
df_time.index = [name.capitalize().replace("_", "-") for name, _, _, _ in uci_info]
for model in models:
    for num_inducing in num_inducings:
        for frac in fracs:
            col = pm_var_time(df, f'{model}-{num_inducing}-{frac}')
            df_time[f'{model}-{num_inducing}-{frac}'] = col
df_time

Unnamed: 0,soft-gp-512-0.9,soft-gp-1024-0.9,svi-gp-512-0.9,svi-gp-1024-0.9,sv-gp-512-0.9,sv-gp-1024-0.9
Pol,0.84 $\pm$ 0.032,-,1.039 $\pm$ 0.037,1.304 $\pm$ 0.017,0.012 $\pm$ 0.0,-
Elevators,0.972 $\pm$ 0.07,-,1.11 $\pm$ 0.005,1.383 $\pm$ 0.033,0.013 $\pm$ 0.001,-
Bike,0.96 $\pm$ 0.008,-,1.224 $\pm$ 0.116,1.456 $\pm$ 0.028,0.012 $\pm$ 0.002,-
Kin40k,2.327 $\pm$ 0.149,-,2.792 $\pm$ 0.09,3.421 $\pm$ 0.038,0.014 $\pm$ 0.0,-
Protein,2.625 $\pm$ 0.15,-,3.03 $\pm$ 0.075,3.762 $\pm$ 0.037,0.013 $\pm$ 0.002,-
Keggdirected,2.86 $\pm$ 0.052,-,3.233 $\pm$ 0.054,4.051 $\pm$ 0.021,0.015 $\pm$ 0.0,-
Slice,3.869 $\pm$ 0.044,-,3.78 $\pm$ 0.054,4.715 $\pm$ 0.068,0.017 $\pm$ 0.0,-
Keggundirected,3.545 $\pm$ 0.098,-,4.269 $\pm$ 0.055,5.231 $\pm$ 0.06,0.014 $\pm$ 0.002,-
3droad,25.159 $\pm$ 0.667,-,29.043 $\pm$ 0.336,35.646 $\pm$ 0.492,-,-
Song,19.45 $\pm$ 0.203,-,20.45 $\pm$ 0.141,25.012 $\pm$ 0.042,-,-


In [10]:
tmp = {
    "N": [int(np.floor(N * 0.9)) for _, N, _, _ in uci_info],
    "D": [D for _, _, D, _ in uci_info],
}
df_44 = pd.DataFrame(data=tmp)
df_44.index = [name.capitalize().replace("_", "-") for name, _, _, _ in uci_info]
df5 = pd.concat([df_44, df_time[["soft-gp-512-0.9", "svi-gp-512-0.9", "svi-gp-1024-0.9"]]], axis=1)
df5 = df5.sort_values(by=['D'], ascending=[True])
df5

Unnamed: 0,N,D,soft-gp-512-0.9,svi-gp-512-0.9,svi-gp-1024-0.9
3droad,391386,3,25.159 $\pm$ 0.667,29.043 $\pm$ 0.336,35.646 $\pm$ 0.492
Kin40k,36000,8,2.327 $\pm$ 0.149,2.792 $\pm$ 0.09,3.421 $\pm$ 0.038
Protein,41157,9,2.625 $\pm$ 0.15,3.03 $\pm$ 0.075,3.762 $\pm$ 0.037
Houseelectric,1844352,11,118.355 $\pm$ 0.856,137.845 $\pm$ 1.217,168.461 $\pm$ 1.77
Bike,15641,17,0.96 $\pm$ 0.008,1.224 $\pm$ 0.116,1.456 $\pm$ 0.028
Elevators,14939,18,0.972 $\pm$ 0.07,1.11 $\pm$ 0.005,1.383 $\pm$ 0.033
Keggdirected,43944,20,2.86 $\pm$ 0.052,3.233 $\pm$ 0.054,4.051 $\pm$ 0.021
Pol,13500,26,0.84 $\pm$ 0.032,1.039 $\pm$ 0.037,1.304 $\pm$ 0.017
Keggundirected,57247,27,3.545 $\pm$ 0.098,4.269 $\pm$ 0.055,5.231 $\pm$ 0.06
Buzz,524925,77,37.166 $\pm$ 0.195,40.015 $\pm$ 0.695,49.109 $\pm$ 0.319


In [11]:
latex_table = df5.to_latex(
    index=True,
    escape=False,
    float_format="{:0.3f}".format,
)
print(latex_table)

\begin{tabular}{lrrlll}
\toprule
 & N & D & soft-gp-512-0.9 & svi-gp-512-0.9 & svi-gp-1024-0.9 \\
\midrule
3droad & 391386 & 3 & 25.159 $\pm$ 0.667 & 29.043 $\pm$ 0.336 & 35.646 $\pm$ 0.492 \\
Kin40k & 36000 & 8 & 2.327 $\pm$ 0.149 & 2.792 $\pm$ 0.09 & 3.421 $\pm$ 0.038 \\
Protein & 41157 & 9 & 2.625 $\pm$ 0.15 & 3.03 $\pm$ 0.075 & 3.762 $\pm$ 0.037 \\
Houseelectric & 1844352 & 11 & 118.355 $\pm$ 0.856 & 137.845 $\pm$ 1.217 & 168.461 $\pm$ 1.77 \\
Bike & 15641 & 17 & 0.96 $\pm$ 0.008 & 1.224 $\pm$ 0.116 & 1.456 $\pm$ 0.028 \\
Elevators & 14939 & 18 & 0.972 $\pm$ 0.07 & 1.11 $\pm$ 0.005 & 1.383 $\pm$ 0.033 \\
Keggdirected & 43944 & 20 & 2.86 $\pm$ 0.052 & 3.233 $\pm$ 0.054 & 4.051 $\pm$ 0.021 \\
Pol & 13500 & 26 & 0.84 $\pm$ 0.032 & 1.039 $\pm$ 0.037 & 1.304 $\pm$ 0.017 \\
Keggundirected & 57247 & 27 & 3.545 $\pm$ 0.098 & 4.269 $\pm$ 0.055 & 5.231 $\pm$ 0.06 \\
Buzz & 524925 & 77 & 37.166 $\pm$ 0.195 & 40.015 $\pm$ 0.695 & 49.109 $\pm$ 0.319 \\
Song & 270000 & 90 & 19.45 $\pm$ 0.203 & 2