## Notebook for Preparation of Evaluation Data for Generation of Figures of Aggregation Times

Import packages

In [None]:
import json
import os
import pickle

import numpy as np
import pandas as pd

pd.options.mode.chained_assignment = None

Initiate variables

In [None]:
# Names of aggregation methods
agg_names = {
    "lp": "Linear Pool",
    "vi": "Vincentization",
    "vi-a": "Vincentization (a)",
    "vi-w": "Vincentization (w)",
    "vi-aw": "Vincentization (a, w)",
}

# Aggregation methods
agg_meths = list(agg_names.keys())

# Methods with coefficient estimation
coeff_meths = ["vi-a", "vi-w", "vi-aw"]

# Names of datasets
set_names = {
    "gusts": "Wind",
    "scen_1": "Scenario 1",
    "scen_4": "Scenario 2",
    "protein": "Protein",
    "naval": "Naval",
    "power": "Power",
    "kin8nm": "Kin8nm",
    "wine": "Wine",
    "concrete": "Concrete",
    "energy": "Energy",
    "boston": "Boston",
    "yacht": "Yacht",
}

Get configuration (Requires to set GIT-Path!)

In [None]:
# Set GIT-path
git_path = "path_to_repo"

def _get_config_info():
    ### Get Config ###
    with open(f"{git_path}/src/config_eval.json", "rb") as f:
        CONFIG = json.load(f)

    # Ensemble method
    ens_method = CONFIG["ENS_METHOD"]

    # Get available ensemble methods
    ens_method_ls = CONFIG["_available_ENS_METHOD"]
    
    # Path for figures
    plot_path = os.path.join(CONFIG["PATHS"]["PLOTS_DIR"], ens_method)

    # Path of data
    data_path = os.path.join(
        CONFIG["PATHS"]["DATA_DIR"],
        CONFIG["PATHS"]["RESULTS_DIR"],
        "dataset",
        ens_method,
    )

    # Path of network ensemble data
    data_ens_path = os.path.join(
        CONFIG["PATHS"]["DATA_DIR"],
        CONFIG["PATHS"]["RESULTS_DIR"],
        "dataset",
        ens_method,
        CONFIG["PATHS"]["ENSEMBLE_F"],
    )

    # Path of aggregated network data
    data_agg_path = os.path.join(
        CONFIG["PATHS"]["DATA_DIR"],
        CONFIG["PATHS"]["RESULTS_DIR"],
        "dataset",
        ens_method,
        CONFIG["PATHS"]["AGG_F"],
    )

    # Models considered
    dataset_ls = CONFIG["DATASET"]

    # Number of simulations
    n_sim = CONFIG["PARAMS"]["N_SIM"]

    # Ensemble size
    n_ens = CONFIG["PARAMS"]["N_ENS"]
    
    # Vector of ensemble members
    step_size = 2
    n_ens_vec = np.arange(
        start=step_size, stop=n_ens + step_size, step=step_size
    )

    # Network variants
    nn_vec = CONFIG["PARAMS"]["NN_VEC"]

    return (
        ens_method,
        ens_method_ls,
        plot_path,
        data_path,
        data_ens_path,
        data_agg_path,
        dataset_ls,
        n_sim,
        n_ens,
        n_ens_vec,
        nn_vec,
    )

In [None]:
(
    _,
    ens_method_ls,
    plot_path,
    data_path,
    data_ens_path,
    data_agg_path,
    _,
    n_sim,
    n_ens,
    n_ens_vec,
    nn_vec,
) = _get_config_info()

In [None]:
in_depth_cases = [
    ("kin8nm", "bayesian"),
    ("boston", "bagging"),
]

Create data for figures

In [5]:
# Initiate new data frame
df_plot = pd.DataFrame()

# For-Loop over data sets, ensembling strategies
for (dataset, ens_method) in in_depth_cases:
    print(f"({dataset}, {ens_method})")
    
    # Replace ensemble method (required as data_path depends on last ens. method called)
    for temp_ens_method in ens_method_ls:
        data_path = data_path.replace(temp_ens_method, ens_method)

    # Replace dataset
    case_data_path = data_path.replace("dataset", dataset)
    
    # For-Loop over network variants, aggregation methods
    for temp_nn in nn_vec:
        # For-Loop over number of ensemble member
        for i_ens in n_ens_vec:
            # For-Loop over number of ensemble member
            for i_sim in range(n_sim):
                # Load data
                filename = f"time_keeping_{temp_nn}_sim_{i_sim}_ens_{i_ens}.pkl"
                with open(os.path.join(case_data_path, "time_keeping", filename), "rb") as f:
                    agg_times = pickle.load(f)

                # New rows
                new_row = {
                    "dataset": set_names[dataset],
                    "ens_method": ens_method,
                    "nn": temp_nn.upper(),
                    "n_ens": i_ens,
                    "n_sim": i_sim,
                    **agg_times
                }

                df_plot = pd.concat(
                    [df_plot, pd.DataFrame(new_row, index=[0])],
                    ignore_index=True,
                )

(kin8nm, bayesian)
(boston, bagging)


Save data

In [None]:
# Save data
with open(f"{git_path}/src/paper_figures_data_aggregation_time.pkl", "wb") as f:
    pickle.dump(df_plot, f)