In [1]:
import pandas as pd
from tqdm.notebook import tqdm
from os.path import exists
import numpy as np
from matplotlib.ticker import MaxNLocator

from data.get_uci import all_datasets
from analysis.util import fetch, init_uci_dict, get_uci_info

In [2]:
filters = {
    "group": "benchmark8"
}
raw = fetch("soft-gp-2", filters)

100%|██████████| 36/36 [00:14<00:00,  2.47it/s]


In [3]:
filters = {
    "group": "benchmark6",
}
raw2 = fetch("soft-gp-2", filters)

100%|██████████| 224/224 [01:28<00:00,  2.53it/s]


In [4]:
uci_info = get_uci_info()

In [5]:
uci_dict = {}
for exp in raw:
    model = exp.config["model.name"]
    dataset = exp.config["dataset.name"]
    num_inducing = exp.config["model.num_inducing"]
    dtype = exp.config["model.dtype"]
    seed = exp.config["training.seed"]
    train_frac = float(exp.config["dataset.train_frac"])
    uci_dict[(dataset, seed, num_inducing, train_frac, model)] = exp.history

for exp in raw2:
    model = exp.config["model.name"]
    if model == "sv-gp" or model == "svi-gp":
        dataset = exp.config["dataset.name"]
        num_inducing = exp.config["model.num_inducing"]
        dtype = exp.config["model.dtype"]
        seed = exp.config["training.seed"]
        train_frac = float(exp.config["dataset.train_frac"])
        uci_dict[(dataset, seed, num_inducing, train_frac, model)] = exp.history

print(uci_dict.keys())

dict_keys([('houseelectric', 92357, 512, 0.9, 'soft-gp'), ('buzz', 92357, 512, 0.9, 'soft-gp'), ('song', 92357, 512, 0.9, 'soft-gp'), ('3droad', 92357, 512, 0.9, 'soft-gp'), ('keggundirected', 92357, 512, 0.9, 'soft-gp'), ('slice', 92357, 512, 0.9, 'soft-gp'), ('keggdirected', 92357, 512, 0.9, 'soft-gp'), ('protein', 92357, 512, 0.9, 'soft-gp'), ('kin40k', 92357, 512, 0.9, 'soft-gp'), ('bike', 92357, 512, 0.9, 'soft-gp'), ('elevators', 92357, 512, 0.9, 'soft-gp'), ('pol', 92357, 512, 0.9, 'soft-gp'), ('houseelectric', 8830, 512, 0.9, 'soft-gp'), ('buzz', 8830, 512, 0.9, 'soft-gp'), ('song', 8830, 512, 0.9, 'soft-gp'), ('3droad', 8830, 512, 0.9, 'soft-gp'), ('keggundirected', 8830, 512, 0.9, 'soft-gp'), ('slice', 8830, 512, 0.9, 'soft-gp'), ('keggdirected', 8830, 512, 0.9, 'soft-gp'), ('protein', 8830, 512, 0.9, 'soft-gp'), ('kin40k', 8830, 512, 0.9, 'soft-gp'), ('bike', 8830, 512, 0.9, 'soft-gp'), ('elevators', 8830, 512, 0.9, 'soft-gp'), ('pol', 8830, 512, 0.9, 'soft-gp'), ('houseelec

In [6]:
seeds = [6535, 8830, 92357]
num_inducings = [512, 1024]
fracs = [0.9]
# fracs = [0.44, 0.89]
tmp = {
    "N": [int(np.floor(N * 0.9)) for _, N, _, _ in uci_info],
    "D": [D for _, _, D, _ in uci_info],
    "exact-rep": [e for _, _, _, e in uci_info],
}
models = ["soft-gp", "svi-gp", "sv-gp"]

# models = ["sv-gp"]
for seed in seeds:
    for model in models:
        for num_inducing in num_inducings:
            for frac in fracs:
                xs = []
                ts = []
                for dataset, _, _, _ in uci_info:
                    try:
                        xs += [uci_dict[(dataset, seed, num_inducing, frac, model)]["test_rmse"][49]]
                        ts += [np.array(uci_dict[(dataset, seed, num_inducing, frac, model)]["epoch_time"][49]).mean()]
                    except Exception as e:
                        xs += [np.nan]
                        ts += [np.nan]
                        print("Exception", e, model, dataset)

                tmp[f"{model}-{num_inducing}-{frac}-{seed}"] = xs
                tmp[f"time-{model}-{num_inducing}-{frac}-{seed}"] = ts
df = pd.DataFrame(data=tmp)
df.index = [name.capitalize().replace("_", "-") for name, _, _, _ in uci_info]
df

Exception ('pol', 6535, 1024, 0.9, 'soft-gp') soft-gp pol
Exception ('elevators', 6535, 1024, 0.9, 'soft-gp') soft-gp elevators
Exception ('bike', 6535, 1024, 0.9, 'soft-gp') soft-gp bike
Exception ('kin40k', 6535, 1024, 0.9, 'soft-gp') soft-gp kin40k
Exception ('protein', 6535, 1024, 0.9, 'soft-gp') soft-gp protein
Exception ('keggdirected', 6535, 1024, 0.9, 'soft-gp') soft-gp keggdirected
Exception ('slice', 6535, 1024, 0.9, 'soft-gp') soft-gp slice
Exception ('keggundirected', 6535, 1024, 0.9, 'soft-gp') soft-gp keggundirected
Exception ('3droad', 6535, 1024, 0.9, 'soft-gp') soft-gp 3droad
Exception ('song', 6535, 1024, 0.9, 'soft-gp') soft-gp song
Exception ('buzz', 6535, 1024, 0.9, 'soft-gp') soft-gp buzz
Exception ('houseelectric', 6535, 1024, 0.9, 'soft-gp') soft-gp houseelectric
Exception 'test_rmse' svi-gp kin40k
Exception 'test_rmse' sv-gp 3droad
Exception 'test_rmse' sv-gp song
Exception 'test_rmse' sv-gp buzz
Exception 'test_rmse' sv-gp houseelectric
Exception 'test_rmse' s

Unnamed: 0,N,D,exact-rep,soft-gp-512-0.9-6535,time-soft-gp-512-0.9-6535,soft-gp-1024-0.9-6535,time-soft-gp-1024-0.9-6535,svi-gp-512-0.9-6535,time-svi-gp-512-0.9-6535,svi-gp-1024-0.9-6535,...,soft-gp-1024-0.9-92357,time-soft-gp-1024-0.9-92357,svi-gp-512-0.9-92357,time-svi-gp-512-0.9-92357,svi-gp-1024-0.9-92357,time-svi-gp-1024-0.9-92357,sv-gp-512-0.9-92357,time-sv-gp-512-0.9-92357,sv-gp-1024-0.9-92357,time-sv-gp-1024-0.9-92357
Pol,13500,26,0.151,0.165535,0.957758,,,0.31135,1.296715,0.293881,...,,,0.321628,1.327214,0.302314,1.45404,0.265855,0.009525,0.25221,0.018156
Elevators,14939,18,0.394,0.398479,1.152537,,,0.389946,1.262276,0.38831,...,,,0.381559,1.255358,0.380791,1.456043,0.394617,0.010938,0.390336,0.018376
Bike,15641,17,0.22,0.205229,1.088694,,,0.284535,1.332126,0.271703,...,,,,,0.266835,1.513914,0.282263,0.010315,0.269703,0.015463
Kin40k,36000,8,0.099,0.195797,2.480578,,,,,0.197408,...,,,0.229365,3.155264,0.19212,3.458676,0.201698,0.011665,0.171754,0.019259
Protein,41157,9,0.536,0.640949,2.702377,,,0.692984,3.017151,0.666799,...,,,0.68783,3.413769,0.661023,4.09886,0.663547,0.012055,0.630435,0.019942
Keggdirected,43944,20,0.086,0.080777,2.808367,,,0.086325,3.651873,0.085391,...,,,0.094027,3.784911,0.093862,4.30415,0.118892,0.010079,0.147462,0.020524
Slice,48150,385,0.262,0.040312,3.136925,,,0.139487,4.33201,0.128201,...,,,0.137776,4.466411,0.125384,5.045128,0.454715,0.014877,0.408396,0.024747
Keggundirected,57247,27,0.118,0.115206,3.636164,,,0.130467,5.449997,0.1303,...,,,0.131526,4.827819,0.131146,5.629603,0.136855,0.013922,0.12869,0.021803
3droad,391386,3,0.101,0.572497,24.096513,,,0.531515,33.051935,0.496384,...,,,0.52814,33.92653,0.495213,37.835541,,,,
Song,270000,90,0.807,0.802532,17.633134,,,0.810079,23.424486,0.808013,...,,,0.801442,23.427389,0.799972,26.567973,,,,


In [7]:
def pm_var(df, model):
    print(df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].mean(axis=1))
    m = df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].mean(axis=1).round(3).astype(str)
    v = df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].std(axis=1).round(3).astype(str).apply(lambda x: f" $\pm$ {x}")
    return (m + v).apply(lambda x: x.replace("nan $\pm$ nan", "-"))
df_rmse = pd.DataFrame()
df_rmse[['N', 'D']] = df[['N', 'D']]
for model in models:
    for num_inducing in num_inducings:
        for frac in [0.9]:
            df_rmse[f'{model}-{num_inducing}-{frac}'] = pm_var(df, f'{model}-{num_inducing}-{frac}')
            # df_rmse['svi-gp'] = pm_var(df, "svi-gp")
            # df_rmse['soft-gp'] = pm_var(df, "soft-gp")

df_rmse

Pol               0.176430
Elevators         0.396540
Bike              0.208337
Kin40k            0.196110
Protein           0.626829
Keggdirected      0.081706
Slice             0.035760
Keggundirected    0.115070
3droad            0.574382
Song              0.798113
Buzz              0.251085
Houseelectric     0.054674
dtype: float64
Pol              NaN
Elevators        NaN
Bike             NaN
Kin40k           NaN
Protein          NaN
Keggdirected     NaN
Slice            NaN
Keggundirected   NaN
3droad           NaN
Song             NaN
Buzz             NaN
Houseelectric    NaN
dtype: float64
Pol               0.318078
Elevators         0.390891
Bike              0.281180
Kin40k            0.226183
Protein           0.685942
Keggdirected      0.088600
Slice             0.137931
Keggundirected    0.126844
3droad            0.530453
Song              0.805033
Buzz              0.301734
Houseelectric     0.074711
dtype: float64
Pol               0.299728
Elevators         0.389187
B

Unnamed: 0,N,D,soft-gp-512-0.9,soft-gp-1024-0.9,svi-gp-512-0.9,svi-gp-1024-0.9,sv-gp-512-0.9,sv-gp-1024-0.9
Pol,13500,26,0.176 $\pm$ 0.01,-,0.318 $\pm$ 0.006,0.3 $\pm$ 0.005,0.26 $\pm$ 0.007,0.245 $\pm$ 0.007
Elevators,14939,18,0.397 $\pm$ 0.012,-,0.391 $\pm$ 0.01,0.389 $\pm$ 0.009,0.401 $\pm$ 0.007,0.397 $\pm$ 0.007
Bike,15641,17,0.208 $\pm$ 0.004,-,0.281 $\pm$ 0.005,0.268 $\pm$ 0.003,0.284 $\pm$ 0.001,0.271 $\pm$ 0.001
Kin40k,36000,8,0.196 $\pm$ 0.005,-,0.226 $\pm$ 0.004,0.192 $\pm$ 0.006,0.205 $\pm$ 0.008,0.171 $\pm$ 0.005
Protein,41157,9,0.627 $\pm$ 0.013,-,0.686 $\pm$ 0.008,0.66 $\pm$ 0.008,0.66 $\pm$ 0.01,0.632 $\pm$ 0.009
Keggdirected,43944,20,0.082 $\pm$ 0.006,-,0.089 $\pm$ 0.005,0.088 $\pm$ 0.005,0.102 $\pm$ 0.016,0.12 $\pm$ 0.024
Slice,48150,385,0.036 $\pm$ 0.005,-,0.138 $\pm$ 0.001,0.127 $\pm$ 0.001,0.479 $\pm$ 0.027,0.424 $\pm$ 0.014
Keggundirected,57247,27,0.115 $\pm$ 0.004,-,0.127 $\pm$ 0.007,0.127 $\pm$ 0.007,0.126 $\pm$ 0.011,0.125 $\pm$ 0.004
3droad,391386,3,0.574 $\pm$ 0.002,-,0.53 $\pm$ 0.002,0.496 $\pm$ 0.001,-,-
Song,270000,90,0.798 $\pm$ 0.004,-,0.805 $\pm$ 0.004,0.803 $\pm$ 0.004,-,-


In [8]:
df2 = df_rmse[["N", "D", "soft-gp-512-0.9", "sv-gp-512-0.9", "svi-gp-1024-0.9"]]
df2 = df2.sort_values(by=['D'], ascending=[True])
df2

Unnamed: 0,N,D,soft-gp-512-0.9,sv-gp-512-0.9,svi-gp-1024-0.9
3droad,391386,3,0.574 $\pm$ 0.002,-,0.496 $\pm$ 0.001
Kin40k,36000,8,0.196 $\pm$ 0.005,0.205 $\pm$ 0.008,0.192 $\pm$ 0.006
Protein,41157,9,0.627 $\pm$ 0.013,0.66 $\pm$ 0.01,0.66 $\pm$ 0.008
Houseelectric,1844352,11,0.055 $\pm$ 0.0,-,0.071 $\pm$ 0.0
Bike,15641,17,0.208 $\pm$ 0.004,0.284 $\pm$ 0.001,0.268 $\pm$ 0.003
Elevators,14939,18,0.397 $\pm$ 0.012,0.401 $\pm$ 0.007,0.389 $\pm$ 0.009
Keggdirected,43944,20,0.082 $\pm$ 0.006,0.102 $\pm$ 0.016,0.088 $\pm$ 0.005
Pol,13500,26,0.176 $\pm$ 0.01,0.26 $\pm$ 0.007,0.3 $\pm$ 0.005
Keggundirected,57247,27,0.115 $\pm$ 0.004,0.126 $\pm$ 0.011,0.127 $\pm$ 0.007
Buzz,524925,77,0.251 $\pm$ 0.001,-,0.296 $\pm$ 0.001


In [9]:
latex_table = df2.to_latex(
    index=True,
    escape=False,
    float_format="{:0.3f}".format,
)
print(latex_table)

\begin{tabular}{lrrlll}
\toprule
 & N & D & soft-gp-512-0.9 & sv-gp-512-0.9 & svi-gp-1024-0.9 \\
\midrule
3droad & 391386 & 3 & 0.574 $\pm$ 0.002 & - & 0.496 $\pm$ 0.001 \\
Kin40k & 36000 & 8 & 0.196 $\pm$ 0.005 & 0.205 $\pm$ 0.008 & 0.192 $\pm$ 0.006 \\
Protein & 41157 & 9 & 0.627 $\pm$ 0.013 & 0.66 $\pm$ 0.01 & 0.66 $\pm$ 0.008 \\
Houseelectric & 1844352 & 11 & 0.055 $\pm$ 0.0 & - & 0.071 $\pm$ 0.0 \\
Bike & 15641 & 17 & 0.208 $\pm$ 0.004 & 0.284 $\pm$ 0.001 & 0.268 $\pm$ 0.003 \\
Elevators & 14939 & 18 & 0.397 $\pm$ 0.012 & 0.401 $\pm$ 0.007 & 0.389 $\pm$ 0.009 \\
Keggdirected & 43944 & 20 & 0.082 $\pm$ 0.006 & 0.102 $\pm$ 0.016 & 0.088 $\pm$ 0.005 \\
Pol & 13500 & 26 & 0.176 $\pm$ 0.01 & 0.26 $\pm$ 0.007 & 0.3 $\pm$ 0.005 \\
Keggundirected & 57247 & 27 & 0.115 $\pm$ 0.004 & 0.126 $\pm$ 0.011 & 0.127 $\pm$ 0.007 \\
Buzz & 524925 & 77 & 0.251 $\pm$ 0.001 & - & 0.296 $\pm$ 0.001 \\
Song & 270000 & 90 & 0.798 $\pm$ 0.004 & - & 0.803 $\pm$ 0.004 \\
Slice & 48150 & 385 & 0.036 $\pm$ 0.00

In [10]:
def pm_var_time(df, model):
    m = df[[f'time-{model}-6535', f'time-{model}-8830', f'time-{model}-92357']].mean(axis=1).round(3).astype(str)
    v = df[[f'time-{model}-6535', f'time-{model}-8830', f'time-{model}-92357']].std(axis=1).round(3).astype(str).apply(lambda x: f" $\pm$ {x}")
    return (m + v).apply(lambda x: x.replace("nan $\pm$ nan", "-"))

tmp = {
    "N": [int(np.floor(N * .9)) for _, N, _, _ in uci_info],
    "D": [D for _, _, D, _ in uci_info],
    "exact-rep": [e for _, _, _, e in uci_info],
}
df_time = pd.DataFrame()
df_time.index = [name.capitalize().replace("_", "-") for name, _, _, _ in uci_info]
for model in models:
    for num_inducing in num_inducings:
        for frac in fracs:
            col = pm_var_time(df, f'{model}-{num_inducing}-{frac}')
            df_time[f'{model}-{num_inducing}-{frac}'] = col
df_time

Unnamed: 0,soft-gp-512-0.9,soft-gp-1024-0.9,svi-gp-512-0.9,svi-gp-1024-0.9,sv-gp-512-0.9,sv-gp-1024-0.9
Pol,0.949 $\pm$ 0.011,-,1.305 $\pm$ 0.019,1.476 $\pm$ 0.144,0.009 $\pm$ 0.001,0.017 $\pm$ 0.002
Elevators,1.07 $\pm$ 0.073,-,1.268 $\pm$ 0.017,1.452 $\pm$ 0.006,0.013 $\pm$ 0.001,0.017 $\pm$ 0.003
Bike,1.087 $\pm$ 0.004,-,1.243 $\pm$ 0.126,1.511 $\pm$ 0.01,0.011 $\pm$ 0.001,0.015 $\pm$ 0.0
Kin40k,2.495 $\pm$ 0.021,-,3.139 $\pm$ 0.023,3.545 $\pm$ 0.077,0.012 $\pm$ 0.002,0.019 $\pm$ 0.0
Protein,2.713 $\pm$ 0.01,-,3.301 $\pm$ 0.247,4.137 $\pm$ 0.034,0.012 $\pm$ 0.001,0.022 $\pm$ 0.002
Keggdirected,2.801 $\pm$ 0.024,-,3.713 $\pm$ 0.067,4.344 $\pm$ 0.152,0.012 $\pm$ 0.003,0.021 $\pm$ 0.0
Slice,3.172 $\pm$ 0.044,-,4.425 $\pm$ 0.081,5.008 $\pm$ 0.073,0.015 $\pm$ 0.001,0.028 $\pm$ 0.004
Keggundirected,3.678 $\pm$ 0.047,-,5.04 $\pm$ 0.355,5.572 $\pm$ 0.054,0.014 $\pm$ 0.002,0.022 $\pm$ 0.001
3droad,24.272 $\pm$ 0.154,-,33.428 $\pm$ 0.45,37.903 $\pm$ 0.059,-,-
Song,17.451 $\pm$ 0.261,-,23.364 $\pm$ 0.107,26.694 $\pm$ 0.111,-,-


In [11]:
tmp = {
    "N": [int(np.floor(N * 0.9)) for _, N, _, _ in uci_info],
    "D": [D for _, _, D, _ in uci_info],
}
df_44 = pd.DataFrame(data=tmp)
df_44.index = [name.capitalize().replace("_", "-") for name, _, _, _ in uci_info]
df5 = pd.concat([df_44, df_time[["soft-gp-512-0.9", "svi-gp-512-0.9", "svi-gp-1024-0.9"]]], axis=1)
df5 = df5.sort_values(by=['D'], ascending=[True])
df5

Unnamed: 0,N,D,soft-gp-512-0.9,svi-gp-512-0.9,svi-gp-1024-0.9
3droad,391386,3,24.272 $\pm$ 0.154,33.428 $\pm$ 0.45,37.903 $\pm$ 0.059
Kin40k,36000,8,2.495 $\pm$ 0.021,3.139 $\pm$ 0.023,3.545 $\pm$ 0.077
Protein,41157,9,2.713 $\pm$ 0.01,3.301 $\pm$ 0.247,4.137 $\pm$ 0.034
Houseelectric,1844352,11,117.933 $\pm$ 1.963,155.967 $\pm$ 2.957,176.823 $\pm$ 6.568
Bike,15641,17,1.087 $\pm$ 0.004,1.243 $\pm$ 0.126,1.511 $\pm$ 0.01
Elevators,14939,18,1.07 $\pm$ 0.073,1.268 $\pm$ 0.017,1.452 $\pm$ 0.006
Keggdirected,43944,20,2.801 $\pm$ 0.024,3.713 $\pm$ 0.067,4.344 $\pm$ 0.152
Pol,13500,26,0.949 $\pm$ 0.011,1.305 $\pm$ 0.019,1.476 $\pm$ 0.144
Keggundirected,57247,27,3.678 $\pm$ 0.047,5.04 $\pm$ 0.355,5.572 $\pm$ 0.054
Buzz,524925,77,34.394 $\pm$ 0.381,49.494 $\pm$ 7.615,52.201 $\pm$ 0.558


In [12]:
latex_table = df5.to_latex(
    index=True,
    escape=False,
    float_format="{:0.3f}".format,
)
print(latex_table)

\begin{tabular}{lrrlll}
\toprule
 & N & D & soft-gp-512-0.9 & svi-gp-512-0.9 & svi-gp-1024-0.9 \\
\midrule
3droad & 391386 & 3 & 24.272 $\pm$ 0.154 & 33.428 $\pm$ 0.45 & 37.903 $\pm$ 0.059 \\
Kin40k & 36000 & 8 & 2.495 $\pm$ 0.021 & 3.139 $\pm$ 0.023 & 3.545 $\pm$ 0.077 \\
Protein & 41157 & 9 & 2.713 $\pm$ 0.01 & 3.301 $\pm$ 0.247 & 4.137 $\pm$ 0.034 \\
Houseelectric & 1844352 & 11 & 117.933 $\pm$ 1.963 & 155.967 $\pm$ 2.957 & 176.823 $\pm$ 6.568 \\
Bike & 15641 & 17 & 1.087 $\pm$ 0.004 & 1.243 $\pm$ 0.126 & 1.511 $\pm$ 0.01 \\
Elevators & 14939 & 18 & 1.07 $\pm$ 0.073 & 1.268 $\pm$ 0.017 & 1.452 $\pm$ 0.006 \\
Keggdirected & 43944 & 20 & 2.801 $\pm$ 0.024 & 3.713 $\pm$ 0.067 & 4.344 $\pm$ 0.152 \\
Pol & 13500 & 26 & 0.949 $\pm$ 0.011 & 1.305 $\pm$ 0.019 & 1.476 $\pm$ 0.144 \\
Keggundirected & 57247 & 27 & 3.678 $\pm$ 0.047 & 5.04 $\pm$ 0.355 & 5.572 $\pm$ 0.054 \\
Buzz & 524925 & 77 & 34.394 $\pm$ 0.381 & 49.494 $\pm$ 7.615 & 52.201 $\pm$ 0.558 \\
Song & 270000 & 90 & 17.451 $\pm$ 0.2