In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# %%writefile example.py

# Imports
from functions import *

# 0. Initialize
# Set ABM Evaluation Budget
budget = 100

# Set Calibration Threshold
calibration_threshold = 1

# Run Comparison
experiment_labels = ["XGBoost","Kriging"]

### Initialize the parameters and constants

In [4]:
# Set ABM parameters and support
islands_exploration_range = np.array([\
        (0,10), # rho
        (0.8,2), # alpha
        (0.0,1.0), # phi
        (0.0,1.0), # pi                                     
        (0.0,1.0), # eps
        (10,100), # N
        (0.0,1.0)]) # Lambda

n_dimensions = islands_exploration_range.shape[0]

# Iterative comparison
This is a simple comparison to show how Kriging compares to XGBoost on the exact same ABM evalutions. As the examples are iteratively selected by XGBoost, we want to show how Kriging performs on these exact same samples. 

## XGBoost Surrogate

In [None]:
_KRIGING = 0
surrogate_models = [None,None]
tic()
surrogate_models[_KRIGING], evaluated_set_X, evaluated_set_y = run_online_surrogate(budget, n_dimensions, islands_exploration_range, calibration_threshold)
print "Time: ", toc()

## Kriging Surrogate

In [None]:
_KRIGING = 1
tic()
surrogate_models[_KRIGING] = kriging(evaluated_set_X, evaluated_set_y, testfunction=island_abm, random_state=0)
surrogate_models[_KRIGING].train()
print "Time: ", toc()

# Out-of-Sample Evaluation
Let's test the performance between the two approaches on 100 random draws from the
space of parameters, conditioned on not being the same parameters from the set used
to train the surrogates above.

In [None]:
# Evaluate on out-of-sample test set
test_size = 100
montecarlos = 100

final_test_size = (test_size*montecarlos)

mse_perf = np.zeros((2,montecarlos))

# Generate unique test set
oos_set = get_sobol_samples(n_dimensions, final_test_size, islands_exploration_range)

selections = []
for i,v in enumerate(oos_set):
    if v not in evaluated_set_X:
        selections.append(i)
oos_set = oos_set[selections]

while oos_set.shape[0]<final_test_size:
    oos_set = np.vstack([oos_set,get_sobol_samples(n_dimensions, 
                                                   final_test_size, 
                                                   islands_exploration_range)])
oos_set = oos_set[:final_test_size]

y_test = evaluate_islands_on_set(oos_set)

In [None]:
# Evaluate Test Set Performance
for _KRIGING in [0,1]:
    if _KRIGING:
        tic()
        y_hat_test = np.array([surrogate_models[_KRIGING].predict(v) for v in oos_set])
        print "Time: ", toc()
    else:
        tic()
        y_hat_test = surrogate_models[_KRIGING].predict(oos_set)
        print "Time: ", toc()

    # MSE performance
    for i in range(montecarlos):
        mse_perf[_KRIGING,i] = mean_squared_error(y_test[i:(i+1)*test_size],
                                                  y_hat_test[i:(i+1)*test_size])

## Plot the densities for each of the methods

In [None]:
# Plot Performance Results
import matplotlib.pylab as plt
import seaborn as sns
%matplotlib inline

fig,ax = plt.subplots(figsize=(12, 5), dpi=300)

xgb_label = "XGBoost: Mean" + str(mse_perf[0].mean()) + ", Variance" + str(mse_perf[0].var())
k_label = "Kriging: Mean" + str(mse_perf[1].mean()) + ", Variance" + str(mse_perf[1].var())

fig1 = sns.distplot(mse_perf[0], label = xgb_label, ax=ax)
fig2 = sns.distplot(mse_perf[1], label = k_label, ax=ax)
plt.title("Out-Of-Sample Prediction Performance")
plt.xlabel('Mean-Squared Error')
plt.yticks(fig1.get_yticks(), fig1.get_yticks() / 10000)
plt.ylabel('Density')
plt.legend()
fig.savefig("xgboost_kriging_it_comparison_" + str(budget) + ".png");