In [1]:
import pandas as pd
from tqdm.notebook import tqdm
from os.path import exists
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

from data.get_uci import all_datasets
from analysis.util import fetch, init_uci_dict, get_uci_info

In [2]:
filters = {
    "group": "benchmark"
}
raw = fetch("soft-gp-3", filters)

100%|██████████| 141/141 [00:59<00:00,  2.35it/s]


In [3]:
filters = {
    "group": "benchmark-learnT"
}
raw_T = fetch("soft-gp-3", filters)

100%|██████████| 36/36 [00:15<00:00,  2.28it/s]


In [4]:
filters = {
    "group": "benchmark-learn-threshold"
}
raw_threshold = fetch("soft-gp-3", filters)

100%|██████████| 36/36 [00:15<00:00,  2.28it/s]


In [5]:
uci_info = get_uci_info()

In [6]:
uci_dict = {}
for exp in raw + raw_T + raw_threshold:
    model = exp.config["model.name"]
    if model == "soft-gp":
        dataset = exp.config["dataset.name"]
        use_T = exp.config["model.use_T"]
        use_threshold = exp.config["model.use_threshold"]
        seed = exp.config["training.seed"]
        if (dataset, seed, use_T, use_threshold, model) in uci_dict:
            print("FAIL", (dataset, seed, use_T, use_threshold, model))
        uci_dict[(dataset, seed, use_T, use_threshold, model)] = exp.history

In [7]:
seeds = [6535, 8830, 92357]
num_inducings = [512, 1024]
KZZ = {}
all_bins = {}
fracs = [0.9]
tmp = {
    "N": [int(np.floor(N * 0.9)) for _, N, _, _ in uci_info],
    "D": [D for _, _, D, _ in uci_info],
}
models = ["soft-gp"]
flags = [(False, False), (True, False), (False, True)]

# models = ["sv-gp"]
for seed in seeds:
    for model in models:
        for T, thresh in flags:
            xs = []
            ts = []
            for dataset, _, _, _ in uci_info:
                try:
                    xs += [uci_dict[(dataset, seed, T, thresh, model)]["test_rmse"][49]]
                    ts += [np.array(uci_dict[(dataset, seed, T, thresh, model)]["epoch_time"][49]).mean()]
                except Exception as e:
                    xs += [np.nan]
                    ts += [np.nan]
                    print("Exception", e, model, dataset)

            tmp[f"{model}-{T}-{thresh}-{seed}"] = xs
            # tmp[f"time-{model}-{T}-{thresh}-{seed}"] = ts

df = pd.DataFrame(data=tmp)
df.index = [name.capitalize().replace("_", "-") for name, _, _, _ in uci_info]
df

Exception 49 soft-gp houseelectric
Exception 49 soft-gp houseelectric
Exception 49 soft-gp houseelectric
Exception 49 soft-gp 3droad
Exception 49 soft-gp houseelectric


Unnamed: 0,N,D,soft-gp-False-False-6535,soft-gp-True-False-6535,soft-gp-False-True-6535,soft-gp-False-False-8830,soft-gp-True-False-8830,soft-gp-False-True-8830,soft-gp-False-False-92357,soft-gp-True-False-92357,soft-gp-False-True-92357
Pol,13500,26,0.189323,0.154656,0.210162,0.202581,0.151282,0.222078,0.195228,0.152485,0.220232
Elevators,14939,18,0.391446,0.389241,0.38968,0.397864,0.397449,0.393269,0.378086,0.385314,0.379188
Bike,15641,17,0.207511,0.163896,0.211401,0.207647,0.16617,0.213196,0.197947,0.162011,0.205756
Kin40k,36000,8,0.242929,0.177756,0.279728,0.228314,0.174724,0.26652,0.240502,0.185938,0.273934
Protein,41157,9,0.657446,0.654623,0.671399,0.638127,0.629582,0.657163,0.653156,0.653128,0.671617
Keggdirected,43944,20,0.07862,0.081057,0.079403,0.077221,0.079538,0.077823,0.086597,0.087371,0.086777
Slice,48150,385,0.019602,0.019829,0.034257,0.018154,0.031028,0.031403,0.027491,0.030558,0.0411
Keggundirected,57247,27,0.115445,0.11575,0.116253,0.111015,0.111366,0.111334,0.11798,0.118415,0.118495
3droad,391386,3,0.607416,0.758684,0.635184,0.607177,0.747717,0.627991,0.605562,,0.626977
Song,270000,90,0.798511,0.811831,0.79844,0.790525,0.802925,0.792173,0.79036,0.804182,0.792551


In [8]:
def pm_var(df, model):
    # print(df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].mean(axis=1))
    m = df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].mean(axis=1).round(3).astype(str)
    v = df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].std(axis=1).round(3).astype(str).apply(lambda x: f" $\pm$ {x}")
    return (m + v).apply(lambda x: x.replace("nan $\pm$ nan", "-"))
df_rmse = pd.DataFrame()
df_rmse[['N', 'D']] = df[['N', 'D']]
for model in ["soft-gp-False-False", "soft-gp-False-True", "soft-gp-True-False"]:
    df_rmse[f'{model}'] = pm_var(df, f'{model}')

df_rmse

Unnamed: 0,N,D,soft-gp-False-False,soft-gp-False-True,soft-gp-True-False
Pol,13500,26,0.196 $\pm$ 0.007,0.217 $\pm$ 0.006,0.153 $\pm$ 0.002
Elevators,14939,18,0.389 $\pm$ 0.01,0.387 $\pm$ 0.007,0.391 $\pm$ 0.006
Bike,15641,17,0.204 $\pm$ 0.006,0.21 $\pm$ 0.004,0.164 $\pm$ 0.002
Kin40k,36000,8,0.237 $\pm$ 0.008,0.273 $\pm$ 0.007,0.179 $\pm$ 0.006
Protein,41157,9,0.65 $\pm$ 0.01,0.667 $\pm$ 0.008,0.646 $\pm$ 0.014
Keggdirected,43944,20,0.081 $\pm$ 0.005,0.081 $\pm$ 0.005,0.083 $\pm$ 0.004
Slice,48150,385,0.022 $\pm$ 0.005,0.036 $\pm$ 0.005,0.027 $\pm$ 0.006
Keggundirected,57247,27,0.115 $\pm$ 0.004,0.115 $\pm$ 0.004,0.115 $\pm$ 0.004
3droad,391386,3,0.607 $\pm$ 0.001,0.63 $\pm$ 0.004,0.753 $\pm$ 0.008
Song,270000,90,0.793 $\pm$ 0.005,0.794 $\pm$ 0.004,0.806 $\pm$ 0.005


In [9]:
df2 = df_rmse[["N", "D", "soft-gp-False-False", "soft-gp-False-True", "soft-gp-True-False"]]
df2 = df2.sort_values(by=['D'], ascending=[True])
df2

Unnamed: 0,N,D,soft-gp-False-False,soft-gp-False-True,soft-gp-True-False
3droad,391386,3,0.607 $\pm$ 0.001,0.63 $\pm$ 0.004,0.753 $\pm$ 0.008
Kin40k,36000,8,0.237 $\pm$ 0.008,0.273 $\pm$ 0.007,0.179 $\pm$ 0.006
Protein,41157,9,0.65 $\pm$ 0.01,0.667 $\pm$ 0.008,0.646 $\pm$ 0.014
Houseelectric,1844352,11,0.055 $\pm$ 0.0,0.055 $\pm$ 0.0,-
Bike,15641,17,0.204 $\pm$ 0.006,0.21 $\pm$ 0.004,0.164 $\pm$ 0.002
Elevators,14939,18,0.389 $\pm$ 0.01,0.387 $\pm$ 0.007,0.391 $\pm$ 0.006
Keggdirected,43944,20,0.081 $\pm$ 0.005,0.081 $\pm$ 0.005,0.083 $\pm$ 0.004
Pol,13500,26,0.196 $\pm$ 0.007,0.217 $\pm$ 0.006,0.153 $\pm$ 0.002
Keggundirected,57247,27,0.115 $\pm$ 0.004,0.115 $\pm$ 0.004,0.115 $\pm$ 0.004
Buzz,524925,77,0.248 $\pm$ 0.001,0.25 $\pm$ 0.001,0.264 $\pm$ 0.001
