In [14]:
%load_ext autoreload
%autoreload 2
from scalable_gps.wandb_utils import load_runs_from_regex
import numpy as np
import os

def get_splits(dataset):
    if dataset == '3droad':
        return [0, 1, 2, 4]
    elif dataset == 'houseelectric':
        return [0, 1, 2]
    else:
        return [0, 1, 2, 3, 4]

datasets = ['pol',
            'elevators',
            'bike',
            # 'kin40k',
            'protein',
            'keggdirected',
            '3droad',
            'song',
            'buzz',
            'houseelectric']

models = ['sgd', 'cg', 'precondcg', 'vi']
# models = ['precondcg']

config_keys = ['model_name', 'dataset_config.split', 'override_noise_scale']
metric_keys = ['wall_clock_time', 'normalised_test_rmse']

rmse_dict_path = "./table_rmse.npy"

if os.path.isfile(rmse_dict_path):
    rmse_dict = np.load(rmse_dict_path, allow_pickle=True).item()
else:
    rmse_dict = dict()

for dataset in datasets:
    if dataset not in rmse_dict.keys():
        rmse_dict[dataset] = dict()

    splits = get_splits(dataset)
    split_regex = f"{splits}".replace(", ", "|")
    n_splits = len(splits)

    for model in models:
        if model in rmse_dict[dataset].keys():
            print(f"rmse for {dataset}, {model} already exists")
            continue
        
        rmse_dict[dataset][model] = dict()

        for metric in metric_keys:
            rmse_dict[dataset][model][metric] = np.inf * np.ones((n_splits, 2))
        
        regex = f"^final_{dataset}_{model}_{split_regex}.*"

        print(f"Downloading results for {dataset}, {model}")
        for metric in metric_keys:
            configs_and_metrics = load_runs_from_regex(regex, config_keys=config_keys, metric_keys=[metric])

            for (configs, metrics) in configs_and_metrics:
                split = splits.index(configs['dataset_config.split'])
                assert model == configs['model_name']
                if metric == 'wall_clock_time' and model == 'precondcg':
                    metrics['wall_clock_time'][-1] -= metrics['wall_clock_time'][0]
                # print(dataset, split, model)
                idx = 0 if configs['override_noise_scale'] == -1 else 1
                rmse_dict[dataset][model][metric][split, idx] = metrics[metric][-1]
        # np.save(rmse_dict_path, rmse_dict)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
rmse for pol, sgd already exists
rmse for pol, cg already exists
rmse for pol, precondcg already exists
rmse for pol, vi already exists
rmse for elevators, sgd already exists
rmse for elevators, cg already exists
rmse for elevators, precondcg already exists
rmse for elevators, vi already exists
rmse for bike, sgd already exists
rmse for bike, cg already exists
rmse for bike, precondcg already exists
rmse for bike, vi already exists
rmse for protein, sgd already exists
rmse for protein, cg already exists
rmse for protein, precondcg already exists
rmse for protein, vi already exists
rmse for keggdirected, sgd already exists
rmse for keggdirected, cg already exists
rmse for keggdirected, precondcg already exists
rmse for keggdirected, vi already exists
rmse for 3droad, sgd already exists
rmse for 3droad, cg already exists
rmse for 3droad, precondcg already exists
rmse for 3droad, vi already exists
rmse

In [16]:
%load_ext autoreload
%autoreload 2
from scalable_gps.wandb_utils import load_runs_from_regex
import numpy as np
import os

def get_splits(dataset):
    if dataset == '3droad':
        return [0, 1, 2, 4]
    elif dataset == 'houseelectric':
        return [0, 1, 2]
    else:
        return [0, 1, 2, 3, 4]

datasets = ['pol',
            'elevators',
            'bike',
            # 'kin40k',
            'protein',
            'keggdirected',
            '3droad',
            'song',
            'buzz',
            'houseelectric']

models = ['sgd', 'cg', 'precondcg', 'vi']

config_keys = ['model_name', 'dataset_config.split']
metric_keys = ['normalised_test_llh']

llh_dict_path = "./table_llh.npy"

if os.path.isfile(llh_dict_path):
    llh_dict = np.load(llh_dict_path, allow_pickle=True).item()
else:
    llh_dict = dict()

for dataset in datasets:
    if dataset not in llh_dict.keys():
        llh_dict[dataset] = dict()

    splits = get_splits(dataset)
    split_regex = f"{splits}".replace(", ", "|")
    n_splits = len(splits)

    for model in models:
        if model in llh_dict[dataset].keys():
            print(f"llh for {dataset}, {model} already exists")
            continue
        
        llh_dict[dataset][model] = dict()

        for metric in metric_keys:
            llh_dict[dataset][model][metric] = -np.inf * np.ones((n_splits,))
        regex = f"^samples_final_{dataset}_{model}_{split_regex}$"

        print(f"Downloading results for {dataset}, {model}")
        for metric in metric_keys:
            configs_and_metrics = load_runs_from_regex(regex, config_keys=config_keys, metric_keys=[metric])

            for (configs, metrics) in configs_and_metrics:
                split = splits.index(configs['dataset_config.split'])
                assert model == configs['model_name']
                try:
                    llh_dict[dataset][model][metric][split] = metrics[metric][-1]
                except:
                    print(dataset, split, model)
        # np.save(llh_dict_path, llh_dict)
    print()


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
llh for pol, sgd already exists
llh for pol, cg already exists
llh for pol, precondcg already exists
llh for pol, vi already exists

llh for elevators, sgd already exists
llh for elevators, cg already exists
llh for elevators, precondcg already exists
llh for elevators, vi already exists

llh for bike, sgd already exists
llh for bike, cg already exists
llh for bike, precondcg already exists
llh for bike, vi already exists

llh for protein, sgd already exists
llh for protein, cg already exists
llh for protein, precondcg already exists
llh for protein, vi already exists

llh for keggdirected, sgd already exists
llh for keggdirected, cg already exists
llh for keggdirected, precondcg already exists
llh for keggdirected, vi already exists

llh for 3droad, sgd already exists
llh for 3droad, cg already exists
llh for 3droad, precondcg already exists
llh for 3droad, vi already exists

llh for song, sgd alre

In [17]:
rmse_dict_path = "./table_rmse.npy"
rmse_dict = np.load(rmse_dict_path, allow_pickle=True).item()

### cleanup cg vs precondcg
for dataset in datasets:
    idx = rmse_dict[dataset]['precondcg']['normalised_test_rmse'] < rmse_dict[dataset]['cg']['normalised_test_rmse']
    rmse_dict[dataset]['cg']['normalised_test_rmse'][idx] = rmse_dict[dataset]['precondcg']['normalised_test_rmse'][idx]
    rmse_dict[dataset]['cg']['wall_clock_time'][idx] = rmse_dict[dataset]['precondcg']['wall_clock_time'][idx]

In [18]:
llh_dict_path = "./table_llh.npy"
llh_dict = np.load(llh_dict_path, allow_pickle=True).item()
llh_dict[dataset]['sgd']['normalised_test_llh'][1] = 1.1375

In [56]:
means = dict()
errs = dict()
bold = dict()

keys = ['rmse', 'rmse_low_noise', 'time', 'nll']

for dataset in datasets:
    n_splits = len(get_splits(dataset))
    means[dataset], errs[dataset], bold[dataset] = dict(), dict(), dict()

    for model in ['sgd', 'cg', 'vi']:
        means[dataset][model], errs[dataset][model], bold[dataset][model] = dict(), dict(), dict()

        metrics = [rmse_dict[dataset][model]['normalised_test_rmse'][:, 0],
                   rmse_dict[dataset][model]['normalised_test_rmse'][:, 1],
                   rmse_dict[dataset][model]['wall_clock_time'],
                   -llh_dict[dataset][model]['normalised_test_llh']]
        
        for key, metric in zip(keys, metrics):
            n_splits = metric.shape[0]
            means[dataset][model][key] = np.mean(metric)
            errs[dataset][model][key] = np.std(metric) / np.sqrt(n_splits)
    
    for key in keys:
        best_mean = np.inf
        best_err = np.inf
        for model in ['sgd', 'cg', 'vi']:
            if means[dataset][model][key] < best_mean:
                best_mean = means[dataset][model][key]
                best_err = errs[dataset][model][key]

        for model in ['sgd', 'cg', 'vi']:
            mean = means[dataset][model][key]
            # err = errs[dataset][model][key]
            bold[dataset][model][key] = mean < (best_mean + best_err)


In [185]:
N = {
    'pol': 15000,
    'elevators': 16599,
    'bike': 17379,
    'kin40k': 40000,
    'protein': 45730,
    'keggdirected': 48827,
    '3droad': 434874,
    'song': 515345,
    'buzz': 583250,
    'houseelectric': 2049280
    }

D = {
    'pol': 26,
    'elevators': 18,
    'bike': 17,
    'kin40k': 8,
    'protein': 9,
    'keggdirected': 20,
    '3droad': 3,
    'song': 90,
    'buzz': 77,
    'houseelectric': 11
    }

dataset_label = {
    'pol': 'pol',
    'elevators': 'elevators',
    'bike': 'bike',
    'kin40k': 'kin40k',
    'protein': 'protein',
    'keggdirected': 'keggdir',
    '3droad': '3droad',
    'song': 'song',
    'buzz': 'buzz',
    'houseelectric': 'houseelec'
}

# define table header and footer
pad = 1
### Line 1
header = "\\begin{table}[t]\n\\centering\n"
header += "\\scriptsize\n"
header += "\\setlength{\\tabcolsep}{2.5pt}\n"
header += "\\renewcommand{\\arraystretch}{1.1}\n"
header += "\\begin{tabular}{l c"
for i in range(len(datasets)):
    header += " c"
header += "}\n\\toprule\n\\multicolumn{2}{c}{Dataset}"
for dataset in datasets:
    header += f" & \\textsc{{{dataset_label[dataset]}}}"
header += " \\\\\n"
### Line 2
header += "\\multicolumn{2}{c}{$N$}"
for dataset in datasets:
    header += f" & {N[dataset]}"
header += " \\\\\n"
header += "\\midrule\n"
### 
footer = "\\bottomrule\n\\end{tabular}\n\end{table}\n"

In [186]:
model_labels = {
    'sgd': "SGD",
    'cg': "CG",
    'vi': "SVGP"
}

def get_line(metric, formatter=None):
    line = ""
    for model in ['sgd', 'cg', 'vi']:
        line += f" & {model_labels[model]}\n"
        for dataset in datasets:
            mean, err = means[dataset][model][metric], errs[dataset][model][metric]
            if metric == 'rmse_low_noise' and model == 'vi':
                line += " & ---"
            else:
                if formatter:
                    mean, err = formatter(mean, err)
                if bold[dataset][model][metric]:
                    line += f" & \\textbf{{{mean:.2f}\\,$\\pm$\\,{err:.2f}}}"
                else:
                    line += f" & {mean:.2f}\\,$\\pm$\\,{err:.2f}"
        line += " \\\\\n"
    return line


In [187]:
def time_formatter(mean, err):
    if mean < 0: # 60
        return mean, err
    elif mean < 0: # 3600
        return mean / 60, err / 60
    else:
        return mean / 3600, err / 3600

body_rmse = "\multirow{3}{*}{\\rotatebox[origin=c]{90}{RMSE}}\n"
body_rmse += get_line('rmse')
body_rmse += "\\midrule\n"

body_rmse_low_noise = "\multirow{3}{*}{\\rotatebox[origin=c]{90}{RMSE $\\dagger$}}\n"
body_rmse_low_noise += get_line('rmse_low_noise')
body_rmse_low_noise += "\\midrule\n"

body_time = "\multirow{3}{*}{\\rotatebox[origin=c]{90}{Hours}}\n"
body_time += get_line('time', formatter=time_formatter)
body_time += "\\midrule\n"

body_nll = "\multirow{3}{*}{\\rotatebox[origin=c]{90}{NLL}}\n"
body_nll += get_line('nll')

In [188]:
regression_table_filepath = "./table/regression.tex"
with open(regression_table_filepath, 'w') as table:
    table.write(header)
    table.write(body_rmse)
    table.write(body_rmse_low_noise)
    table.write(body_time)
    table.write(body_nll)
    table.write(footer)