In [73]:
import openml
import pandas as pd

dataset = openml.datasets.get_dataset(487)
X, y, _, attribute_names = dataset.get_data(target=dataset.default_target_attribute)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)



In [74]:
enn = pd.read_csv('paper_ensemble_nn_results.txt', header=None).iloc[:, :-1]
bnn = pd.read_csv('paper_bnn_results.txt', header=None)
erf = pd.read_csv('paper_ensemble_rf_results.txt', header=None).iloc[:, :-1]
bf = pd.read_csv('paper_bf_results.txt', header=None).iloc[:, :-1]
pbf_0 = pd.read_csv('paper_pbf_w0_full_set_results.txt', header=None).iloc[:, :-1]
pbf_01 = pd.read_csv('paper_pbf_w01_full_set_results.txt', header=None).iloc[:, :-1]

In [75]:
means = {}
means["enn"] = enn.mean(axis=1)
means["bnn"] = bnn.iloc[:, 0]
means["erf"] = erf.mean(axis=1)
means["bf"] = bf.mean(axis=1)
means["pbf_0"] = pbf_0.mean(axis=1)
means["pbf_01"] = pbf_01.mean(axis=1)

In [76]:
stds = {}
stds["enn"] = enn.std(axis=1)
stds["bnn"] = bnn.iloc[:, 1]
stds["erf"] = erf.std(axis=1)
stds["bf"] = bf.std(axis=1)
stds["pbf_0"] = pbf_0.std(axis=1)
stds["pbf_01"] = pbf_01.std(axis=1)

In [77]:
std_means = {}
std_means["enn"] = enn.std(axis=1).mean()
std_means["bnn"] = bnn.iloc[:, 1].mean()
std_means["erf"] = erf.std(axis=1).mean()
std_means["bf"] = bf.std(axis=1).mean()
std_means["pbf_0"] = pbf_0.std(axis=1).mean()
std_means["pbf_01"] = pbf_01.std(axis=1).mean()

In [78]:
std_totals = {}
std_totals["enn"] = enn.std(axis=1).sum()
std_totals["bnn"] = bnn.iloc[:, 1].sum()
std_totals["erf"] = erf.std(axis=1).sum()
std_totals["bf"] = bf.std(axis=1).sum()
std_totals["pbf_0"] = pbf_0.std(axis=1).sum()
std_totals["pbf_01"] = pbf_01.std(axis=1).sum()

In [79]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = {}
mae["enn"] = mean_absolute_error(y_test, means["enn"])
mae["bnn"] = mean_absolute_error(y_test, means["bnn"])
mae["erf"] = mean_absolute_error(y_test, means["erf"])
mae["bf"] = mean_absolute_error(y_test, means["bf"])
mae["pbf_0"] = mean_absolute_error(y_test, means["pbf_0"])
mae["pbf_01"] = mean_absolute_error(y_test, means["pbf_01"])

mse = {}
mse["enn"] = mean_squared_error(y_test, means["enn"])
mse["bnn"] = mean_squared_error(y_test, means["bnn"])
mse["erf"] = mean_squared_error(y_test, means["erf"])
mse["bf"] = mean_squared_error(y_test, means["bf"])
mse["pbf_0"] = mean_squared_error(y_test, means["pbf_0"])
mse["pbf_01"] = mean_squared_error(y_test, means["pbf_01"])

In [80]:
import uncertainty_analysis.uncertainty_metrics as metrics

cib_interval_values = {}
cib_interval_values["enn"] = metrics.get_95_ci_intervals(enn.astype(float))
cib_interval_values["bnn"] = metrics.get_95_cb_intervals(bnn.astype(float))
cib_interval_values["erf"] = metrics.get_95_ci_intervals(erf.astype(float))
cib_interval_values["bf"] = metrics.get_95_ci_intervals(bf.astype(float))
cib_interval_values["pbf_0"] = metrics.get_95_ci_intervals(pbf_0.astype(float))
cib_interval_values["pbf_01"] = metrics.get_95_ci_intervals(pbf_01.astype(float))

In [82]:
tvs_scores = {}
tvs_scores["enn"] = metrics.tvs_score(cib_interval_values["enn"], y_test)
tvs_scores["bnn"] = metrics.tvs_score_bnn(cib_interval_values["bnn"], y_test)
tvs_scores["erf"] = metrics.tvs_score(cib_interval_values["erf"], y_test)
tvs_scores["bf"] = metrics.tvs_score(cib_interval_values["bf"], y_test)
tvs_scores["pbf_0"] = metrics.tvs_score(cib_interval_values["pbf_0"], y_test)
tvs_scores["pbf_01"] = metrics.tvs_score(cib_interval_values["pbf_01"], y_test)

In [83]:
quality_scores = {}
quality_scores["enn"] = metrics.quality_score(cib_interval_values["enn"], y_test)
quality_scores["bnn"] = metrics.quality_score_bnn(cib_interval_values["bnn"], y_test)
quality_scores["erf"] = metrics.quality_score(cib_interval_values["erf"], y_test)
quality_scores["bf"] = metrics.quality_score(cib_interval_values["bf"], y_test)
quality_scores["pbf_0"] = metrics.quality_score(cib_interval_values["pbf_0"], y_test)
quality_scores["pbf_01"] = metrics.quality_score(cib_interval_values["pbf_01"], y_test)

In [91]:
rounded_dict = {key: round(value, 6) for key, value in quality_scores.items()}
print(rounded_dict)

{'enn': 69.607164, 'bnn': 480.501758, 'erf': 8.497975, 'bf': 8.348049, 'pbf_0': 7.064103, 'pbf_01': 8.285746}


In [92]:
cds_scores = pd.DataFrame(index=cib_interval_values.keys(), columns=cib_interval_values.keys())

# Apply the function to the values and fill the DataFrame with the results
for var_x, val_x in cib_interval_values.items():
    for var_y, val_y in cib_interval_values.items():
        if var_x == 'bnn':
            cds_scores.loc[var_x, var_y] = metrics.cds_score(val_x, means[var_x], val_y, means[var_y], bnn=True)
        elif var_y == 'bnn':
            cds_scores.loc[var_x, var_y] = metrics.cds_score(val_y, means[var_y], val_x, means[var_x], bnn=True)
        else:
            cds_scores.loc[var_x, var_y] = metrics.cds_score(val_y, means[var_y], val_x, means[var_x])

cds_scores

Unnamed: 0,enn,bnn,erf,bf,pbf_0,pbf_01
enn,1.0,0.8125,0.1875,0.1875,0.1875,0.1875
bnn,0.8125,1.0,0.6875,0.625,0.625,0.5
erf,0.1875,0.6875,1.0,0.9375,0.4375,0.375
bf,0.1875,0.625,0.9375,1.0,0.3125,0.5625
pbf_0,0.1875,0.625,0.4375,0.3125,1.0,0.0625
pbf_01,0.1875,0.5,0.375,0.5625,0.0625,1.0
