In [1]:
import openml
import numpy as np

dataset = openml.datasets.get_dataset(487)
X, y, _, attribute_names = dataset.get_data(target=dataset.default_target_attribute)
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X = sc.fit_transform(X)
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)



In [2]:
from sklearn.ensemble import RandomForestRegressor
from uncertainty_analysis import bootstrap

n_rubin_samples = 500
n_pbf_btstrp_smpls = 30

# Bayesian Forest

In [4]:
paper_bf_results = []  # List[ensemble np.array[regressors np.array[predictions]]
for i in range(0, 10):
    m = RandomForestRegressor(criterion='absolute_error', max_depth=None, max_features=13,
                              min_samples_split=2, min_samples_leaf=4, n_estimators=300
                              ).fit(X_train, y_train)

    predictions = np.empty((m.n_estimators, len(X_test)))

    for j, tree in enumerate(m.estimators_):
        predictions[j] = tree.predict(X_test)

    bootstrapped_predictions = np.empty((len(X_test), n_rubin_samples))
    for k, result_set in enumerate(predictions.T):
        bootstrapped_predictions[k] = bootstrap.bayesian_bootstrap(result_set, n_rubin_samples)

    paper_bf_results.append(bootstrapped_predictions)

In [5]:
from uncertainty_analysis import project_helper_functions as phf

best_model_results = paper_bf_results[phf.bf_get_best_model_idx(paper_bf_results, y_test)]
phf.write_ensemble_model_results(best_model_results, "paper_bf_results.txt")

# Proper Bayesian Forest, w=0

In [6]:
from uncertainty_analysis import proper_bayesian_forest as pbf

k_w0 = pbf.get_k(0, n_pbf_btstrp_smpls)

In [7]:
def create_priors(X):
    n_features = X.shape[1]
    priors = np.empty(n_features, dtype=pbf.UniformPrior)
    for i in range(0, n_features):
        priors[i] = pbf.UniformPrior(X[:, i])

    return priors

In [13]:
def proper_bf(k):
    pbf_results = []

    for i in range(0, 10):
        m = pbf.ProperBayesianForest(X_train, y_train,
                                     create_priors(X_train),
                                     k_values=np.array([k] * X_train.shape[1], dtype=np.int32),
                                     n_galvani_samples=n_pbf_btstrp_smpls,
                                     criterion='absolute_error', max_depth=None, max_features=13,
                                     min_samples_split=2, min_samples_leaf=4, n_estimators=300)
        m.fit()

        predictions = np.empty((len(X_test), n_rubin_samples))

        for j, xs in enumerate(X_test):
            predictions[j] = m.get_prediction_distribution(xs.reshape(1, -1), n_rubin_samples=n_rubin_samples)

        pbf_results.append(predictions)

    return pbf_results

In [14]:
pbf_w0_results = proper_bf(k_w0)
best_model_results = pbf_w0_results[phf.bf_get_best_model_idx(pbf_w0_results, y_test)]
phf.write_ensemble_model_results(best_model_results, "paper_pbf_w0_full_set_results.txt")

# Proper Bayesian Forest, w=0.1

In [15]:
k_w1 = pbf.get_k(0.1, n_pbf_btstrp_smpls)

In [16]:
pbf_w1_results = proper_bf(k_w1)
best_model_results = pbf_w1_results[phf.bf_get_best_model_idx(pbf_w1_results, y_test)]
phf.write_ensemble_model_results(best_model_results, "paper_pbf_w01_full_set_results.txt")