In [1]:
import pandas as pd
import wandb
from tqdm.notebook import tqdm
import pickle
from os.path import exists
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import math
from matplotlib.ticker import MaxNLocator

from data.get_uci import all_datasets
from analysis.util import fetch, init_uci_dict, get_uci_info

uci_info = get_uci_info()

In [2]:
filters = {
    "group": "benchmark8"
}
raw_orig = fetch("soft-gp-2", filters)

100%|██████████| 36/36 [00:15<00:00,  2.40it/s]


In [3]:
runs2 = {}
uci_dict2 = {}
for exp in raw_orig:
    model = exp.config["model.name"]
    dataset = exp.config["dataset.name"]
    num_inducing = exp.config["model.num_inducing"]
    dtype = exp.config["model.dtype"]
    seed = exp.config["training.seed"]
    train_frac = float(exp.config["dataset.train_frac"])
    if model == "svi-gp" and not exp.config["model.learn_noise"]:
        continue 
    if model == "sv-gp" and not exp.config["model.learn_noise"]:
        continue 
    uci_dict2[(dataset, seed, num_inducing, train_frac, model)] = exp.history
    runs2[(dataset, seed, num_inducing, train_frac, model)] = exp.run.id

In [4]:
seeds = [6535, 8830, 92357]
num_inducings = [512, 1024]
fracs = [0.9]

UCI_INFO = {
    "N": [int(np.floor(N * 0.9)) for _, N, _, _ in uci_info],
    "D": [D for _, _, D, _ in uci_info],
}
KZZ = {}
all_bins = {}

models = ["soft-gp"]

for seed in seeds:
    for model in models:
        for num_inducing in num_inducings:
            for frac in fracs:
                xs = []
                ts = []
                K_zzs = []
                bins1 = []
                bins2 = []
                bins3 = []
                bins4 = []
                bins5 = []
                bins6 = []
                for dataset, _, _, _ in uci_info:
                    try:
                        xs += [float(uci_dict2[(dataset, seed, num_inducing, frac, model)]["test_rmse"][49])]
                        ts += [float(np.array(uci_dict2[(dataset, seed, num_inducing, frac, model)]["epoch_time"][49]).mean())]
                        K_zzs += [uci_dict2[(dataset, seed, num_inducing, frac, model)]["K_zz"][i] for i in range(5)]
                        bins1 += [uci_dict2[(dataset, seed, num_inducing, frac, model)]["K_zz_bin_0.0"][49]]
                        bins2 += [uci_dict2[(dataset, seed, num_inducing, frac, model)]["K_zz_bin_1e-20"][49]]
                        bins3 += [uci_dict2[(dataset, seed, num_inducing, frac, model)]["K_zz_bin_1e-10"][49]]
                        bins4 += [uci_dict2[(dataset, seed, num_inducing, frac, model)]["K_zz_bin_1e-05"][49]]
                        bins5 += [uci_dict2[(dataset, seed, num_inducing, frac, model)]["K_zz_bin_0.01"][49]]
                        bins6 += [uci_dict2[(dataset, seed, num_inducing, frac, model)]["K_zz_bin_0.5"][49]]
                    except Exception as e:
                        xs += [np.nan]
                        ts += [np.nan]
                        bins1 += [np.nan]
                        bins2 += [np.nan]
                        bins3 += [np.nan]
                        bins4 += [np.nan]
                        bins5 += [np.nan]
                        bins6 += [np.nan]
                        print("Exception", e, model, dataset)

                UCI_INFO[f"{model}-{num_inducing}-{frac}-{seed}"] = xs
                # UCI_INFO[f"time-{model}-{num_inducing}-{frac}-{seed}"] = ts
                all_bins[f"0.0-{model}-{num_inducing}-{frac}-{seed}"] = bins1
                all_bins[f"1e-20-{model}-{num_inducing}-{frac}-{seed}"] = bins2
                all_bins[f"1e-10-{model}-{num_inducing}-{frac}-{seed}"] = bins3
                all_bins[f"1e-05-{model}-{num_inducing}-{frac}-{seed}"] = bins4
                all_bins[f"0.01-{model}-{num_inducing}-{frac}-{seed}"] = bins5
                all_bins[f"0.5-{model}-{num_inducing}-{frac}-{seed}"] = bins6
                KZZ[f"kzz-{model}-{num_inducing}-{frac}-{seed}"] = K_zzs
df = pd.DataFrame(data=UCI_INFO)
df.index = [name.capitalize().replace("_", "-") for name, _, _, _ in uci_info]
df

Exception ('pol', 6535, 1024, 0.9, 'soft-gp') soft-gp pol
Exception ('elevators', 6535, 1024, 0.9, 'soft-gp') soft-gp elevators
Exception ('bike', 6535, 1024, 0.9, 'soft-gp') soft-gp bike
Exception ('kin40k', 6535, 1024, 0.9, 'soft-gp') soft-gp kin40k
Exception ('protein', 6535, 1024, 0.9, 'soft-gp') soft-gp protein
Exception ('keggdirected', 6535, 1024, 0.9, 'soft-gp') soft-gp keggdirected
Exception ('slice', 6535, 1024, 0.9, 'soft-gp') soft-gp slice
Exception ('keggundirected', 6535, 1024, 0.9, 'soft-gp') soft-gp keggundirected
Exception ('3droad', 6535, 1024, 0.9, 'soft-gp') soft-gp 3droad
Exception ('song', 6535, 1024, 0.9, 'soft-gp') soft-gp song
Exception ('buzz', 6535, 1024, 0.9, 'soft-gp') soft-gp buzz
Exception ('houseelectric', 6535, 1024, 0.9, 'soft-gp') soft-gp houseelectric
Exception 49 soft-gp houseelectric
Exception ('pol', 8830, 1024, 0.9, 'soft-gp') soft-gp pol
Exception ('elevators', 8830, 1024, 0.9, 'soft-gp') soft-gp elevators
Exception ('bike', 8830, 1024, 0.9, 'so

Unnamed: 0,N,D,soft-gp-512-0.9-6535,soft-gp-1024-0.9-6535,soft-gp-512-0.9-8830,soft-gp-1024-0.9-8830,soft-gp-512-0.9-92357,soft-gp-1024-0.9-92357
Pol,13500,26,0.165535,,0.183801,,0.179955,
Elevators,14939,18,0.398479,,0.407659,,0.383482,
Bike,15641,17,0.205229,,0.21311,,0.206672,
Kin40k,36000,8,0.195797,,0.191214,,0.201318,
Protein,41157,9,0.640949,,0.61629,,0.623248,
Keggdirected,43944,20,0.080777,,0.076483,,0.087858,
Slice,48150,385,0.040312,,0.030302,,0.036665,
Keggundirected,57247,27,0.115206,,0.111449,,0.118556,
3droad,391386,3,0.572497,,0.574753,,0.575896,
Song,270000,90,0.802532,,0.796083,,0.795724,


In [5]:
filters = {
    "group": "noise2"
}
raw = fetch("soft-gp-2", filters)

100%|██████████| 36/36 [00:12<00:00,  2.77it/s]


In [6]:
runs = {}
uci_dict = {}
for exp in raw:
    model = exp.config["model.name"]
    dataset = exp.config["dataset.name"]
    num_inducing = exp.config["model.num_inducing"]
    dtype = exp.config["model.dtype"]
    seed = exp.config["training.seed"]
    learn_noise = exp.config["model.learn_noise"]
    train_frac = float(exp.config["dataset.train_frac"])
    uci_dict[(dataset, seed, num_inducing, train_frac, model, learn_noise)] = exp.history
    runs[(dataset, seed, num_inducing, train_frac, model, learn_noise)] = exp.run.id

print(uci_dict.keys())

dict_keys([('houseelectric', 92357, 512, 0.9, 'soft-gp', True), ('houseelectric', 8830, 512, 0.9, 'soft-gp', True), ('houseelectric', 6535, 512, 0.9, 'soft-gp', True), ('buzz', 92357, 512, 0.9, 'soft-gp', True), ('buzz', 8830, 512, 0.9, 'soft-gp', True), ('buzz', 6535, 512, 0.9, 'soft-gp', True), ('song', 92357, 512, 0.9, 'soft-gp', True), ('song', 8830, 512, 0.9, 'soft-gp', True), ('song', 6535, 512, 0.9, 'soft-gp', True), ('3droad', 92357, 512, 0.9, 'soft-gp', True), ('3droad', 8830, 512, 0.9, 'soft-gp', True), ('3droad', 6535, 512, 0.9, 'soft-gp', True), ('keggundirected', 92357, 512, 0.9, 'soft-gp', True), ('keggundirected', 8830, 512, 0.9, 'soft-gp', True), ('keggundirected', 6535, 512, 0.9, 'soft-gp', True), ('slice', 92357, 512, 0.9, 'soft-gp', True), ('slice', 8830, 512, 0.9, 'soft-gp', True), ('slice', 6535, 512, 0.9, 'soft-gp', True), ('keggdirected', 92357, 512, 0.9, 'soft-gp', True), ('keggdirected', 8830, 512, 0.9, 'soft-gp', True), ('keggdirected', 6535, 512, 0.9, 'soft-g

In [7]:
# UCI_INFO = {
#     "N": [int(np.floor(N * 0.9)) for _, N, _, _ in uci_info],
#     "D": [D for _, _, D, _ in uci_info],
#     # "exact-rep": [e for _, _, _, e in uci_info],
# }
# KZZ = {}
# all_bins = {}

models = ["soft-gp"]
seeds = [6535, 8830, 92357]

# models = ["sv-gp"]
for ln in [True]:
    model = "soft-gp"
    for seed in seeds:
        frac = 0.9
        num_inducing = 512
        xs = []
        ts = []
        K_zzs = []
        bins1 = []
        bins2 = []
        bins3 = []
        bins4 = []
        bins5 = []
        bins6 = []
        for dataset, _, _, _ in uci_info:
            try:
                xs += [float(uci_dict[(dataset, seed, num_inducing, frac, model, ln)]["test_rmse"][49])]
                ts += [float(np.array(uci_dict[(dataset, seed, num_inducing, frac, model, ln)]["epoch_time"][49]).mean())]
                K_zzs += [uci_dict[(dataset, seed, num_inducing, frac, model, ln)]["K_zz"][i] for i in range(5)]
                bins1 += [uci_dict[(dataset, seed, num_inducing, frac, model, ln)]["K_zz_bin_0.0"][49]]
                bins2 += [uci_dict[(dataset, seed, num_inducing, frac, model, ln)]["K_zz_bin_1e-20"][49]]
                bins3 += [uci_dict[(dataset, seed, num_inducing, frac, model, ln)]["K_zz_bin_1e-10"][49]]
                bins4 += [uci_dict[(dataset, seed, num_inducing, frac, model, ln)]["K_zz_bin_1e-05"][49]]
                bins5 += [uci_dict[(dataset, seed, num_inducing, frac, model, ln)]["K_zz_bin_0.01"][49]]
                bins6 += [uci_dict[(dataset, seed, num_inducing, frac, model, ln)]["K_zz_bin_0.5"][49]]
            except Exception as e:
                xs += [np.nan]
                ts += [np.nan]
                bins1 += [np.nan]
                bins2 += [np.nan]
                bins3 += [np.nan]
                bins4 += [np.nan]
                bins5 += [np.nan]
                bins6 += [np.nan]
                print("Exception", e, model, dataset)

        UCI_INFO[f"{model}-{num_inducing}-{frac}-{ln}-{seed}"] = xs
        # UCI_INFO[f"time-{model}-{num_inducing}-{frac}-{seed}-{ln}"] = ts
        all_bins[f"0.0-{model}-{num_inducing}-{frac}-{ln}-{seed}"] = bins1
        all_bins[f"1e-20-{model}-{num_inducing}-{frac}-{ln}-{seed}"] = bins2
        all_bins[f"1e-10-{model}-{num_inducing}-{frac}-{ln}-{seed}"] = bins3
        all_bins[f"1e-05-{model}-{num_inducing}-{frac}-{ln}-{seed}"] = bins4
        all_bins[f"0.01-{model}-{num_inducing}-{frac}-{ln}-{seed}"] = bins5
        all_bins[f"0.5-{model}-{num_inducing}-{frac}-{ln}-{seed}"] = bins6
        KZZ[f"kzz-{model}-{num_inducing}-{ln}-{frac}-{seed}"] = K_zzs
df = pd.DataFrame(data=UCI_INFO)
df.index = [name.capitalize().replace("_", "-") for name, _, _, _ in uci_info]
df

Unnamed: 0,N,D,soft-gp-512-0.9-6535,soft-gp-1024-0.9-6535,soft-gp-512-0.9-8830,soft-gp-1024-0.9-8830,soft-gp-512-0.9-92357,soft-gp-1024-0.9-92357,soft-gp-512-0.9-True-6535,soft-gp-512-0.9-True-8830,soft-gp-512-0.9-True-92357
Pol,13500,26,0.165535,,0.183801,,0.179955,,0.188124,0.200263,0.198621
Elevators,14939,18,0.398479,,0.407659,,0.383482,,0.402369,0.411259,0.395064
Bike,15641,17,0.205229,,0.21311,,0.206672,,0.262051,0.268186,0.271179
Kin40k,36000,8,0.195797,,0.191214,,0.201318,,0.295843,0.279393,0.288044
Protein,41157,9,0.640949,,0.61629,,0.623248,,0.70344,0.690762,0.698279
Keggdirected,43944,20,0.080777,,0.076483,,0.087858,,0.087506,0.082949,0.092178
Slice,48150,385,0.040312,,0.030302,,0.036665,,0.050486,0.041637,0.04622
Keggundirected,57247,27,0.115206,,0.111449,,0.118556,,0.119892,0.113855,0.122106
3droad,391386,3,0.572497,,0.574753,,0.575896,,0.775756,0.770694,0.771834
Song,270000,90,0.802532,,0.796083,,0.795724,,0.805732,0.800154,0.799112


In [8]:
def pm_var(df, model):
    m = df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].mean(axis=1).round(3).astype(str)
    v = df[[f'{model}-6535', f'{model}-8830', f'{model}-92357']].std(axis=1).round(3).astype(str).apply(lambda x: f" $\pm$ {x}")
    return (m + v).apply(lambda x: x.replace("nan $\pm$ nan", "-"))
df_rmse = pd.DataFrame()
df_rmse[['N', 'D']] = df[['N', 'D']]
for model in models:
    for num_inducing in [512]:
        for frac in fracs:
            df_rmse[f'{model}-{num_inducing}-{frac}'] = pm_var(df, f'{model}-{num_inducing}-{frac}')
            df_rmse[f'{model}-{num_inducing}-{frac}-{True}'] = pm_var(df, f'{model}-{num_inducing}-{frac}-{True}')

df_rmse = df_rmse.sort_values(by=['D'], ascending=[True])
df_rmse = df_rmse.drop(columns=['D', 'N'])
print("RMSE")
df_rmse

RMSE


Unnamed: 0,soft-gp-512-0.9,soft-gp-512-0.9-True
3droad,0.574 $\pm$ 0.002,0.773 $\pm$ 0.003
Kin40k,0.196 $\pm$ 0.005,0.288 $\pm$ 0.008
Protein,0.627 $\pm$ 0.013,0.697 $\pm$ 0.006
Houseelectric,0.055 $\pm$ 0.0,0.057 $\pm$ 0.0
Bike,0.208 $\pm$ 0.004,0.267 $\pm$ 0.005
Elevators,0.397 $\pm$ 0.012,0.403 $\pm$ 0.008
Keggdirected,0.082 $\pm$ 0.006,0.088 $\pm$ 0.005
Pol,0.176 $\pm$ 0.01,0.196 $\pm$ 0.007
Keggundirected,0.115 $\pm$ 0.004,0.119 $\pm$ 0.004
Buzz,0.251 $\pm$ 0.001,0.266 $\pm$ 0.004


In [9]:
latex_table = df_rmse.to_latex(
    index=True,
    escape=False,
    float_format="{:0.3f}".format,
)
print(latex_table)

\begin{tabular}{lll}
\toprule
 & soft-gp-512-0.9 & soft-gp-512-0.9-True \\
\midrule
3droad & 0.574 $\pm$ 0.002 & 0.773 $\pm$ 0.003 \\
Kin40k & 0.196 $\pm$ 0.005 & 0.288 $\pm$ 0.008 \\
Protein & 0.627 $\pm$ 0.013 & 0.697 $\pm$ 0.006 \\
Houseelectric & 0.055 $\pm$ 0.0 & 0.057 $\pm$ 0.0 \\
Bike & 0.208 $\pm$ 0.004 & 0.267 $\pm$ 0.005 \\
Elevators & 0.397 $\pm$ 0.012 & 0.403 $\pm$ 0.008 \\
Keggdirected & 0.082 $\pm$ 0.006 & 0.088 $\pm$ 0.005 \\
Pol & 0.176 $\pm$ 0.01 & 0.196 $\pm$ 0.007 \\
Keggundirected & 0.115 $\pm$ 0.004 & 0.119 $\pm$ 0.004 \\
Buzz & 0.251 $\pm$ 0.001 & 0.266 $\pm$ 0.004 \\
Song & 0.798 $\pm$ 0.004 & 0.802 $\pm$ 0.004 \\
Slice & 0.036 $\pm$ 0.005 & 0.046 $\pm$ 0.004 \\
\bottomrule
\end{tabular}

