# Surrogate Modeling Batch Example
This example does not use the iterative procedure from the paper. It simply compares the performance of the XGBoost Surrogate Model against the Kriging Model.

## Let's setup our environment

### First let's increase the width of our workspace

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

### Let's get the imports and functions we need from functions.py

In [3]:
# Imports
from functions import *

### Initialize the parameters and constants

In [5]:
# Set the ABM Evaluation Budget
budget = 1000

# Set the Calibration Threshold
calibration_threshold = 1

# Set the ABM parameters and support
islands_exploration_range = np.array([\
        (0,10), # rho
        (0.8,2), # alpha
        (0.0,1.0), # phi
        (0.0,1.0), # pi                                     
        (0.0,1.0), # eps
        (10,100), # N
        (0.0,1.0)]) # Lambda

param_dims = islands_exploration_range.shape[0]

## Evaluate entire Budget in Batch

In [None]:
n_dimensions = islands_exploration_range.shape[0]

final_evaluated_set_X = get_sobol_samples(n_dimensions, budget, islands_exploration_range)
evaluated_set_y = evaluate_islands_on_set(final_evaluated_set_X)

  log_GDP = np.log(GDP)


# Batch comparison
This is a simple comparison to show how Kriging compares to XGBoost on the exact same ABM evalutions

## Kriging Surrogate

In [None]:
surrogate_models_kriging.append(kriging(final_evaluated_set_X, evaluated_set_y, testfunction=island_abm, random_state=0))

## XGBoost Surrogate

In [None]:
surrogate_model, surrogate_parameter_space = set_surrogate_as_gbt()
surrogate_model_XGBoost = fit_surrogate_model(final_evaluated_set_X,evaluated_set_y, 
                                              surrogate_model=surrogate_model, 
                                              surrogate_parameter_space=surrogate_parameter_space)

# Out-of-Sample Evaluation
Let's test the performance between the two approaches on 100 random draws from the
space of parameters, conditioned on not being the same parameters from the set used
to train the surrogates above.

In [None]:
# Evaluate on out-of-sample test set
test_size = 100
montecarlos = 100

final_test_size = (test_size*montecarlos)

mse_perf = np.zeros((2,montecarlos))

# Generate unique test set
oos_set = get_sobol_samples(n_dimensions, final_test_size, islands_exploration_range)

selections = []
for i,v in enumerate(oos_set):
    if (v not in final_evaluated_set_X[0]) and (v not in final_evaluated_set_X[1]):
        selections.append(i)
oos_set = oos_set[selections]

while oos_set.shape[0]<final_test_size:
    oos_set = np.vstack([oos_set,get_sobol_samples(n_dimensions, 
                                                   final_test_size, 
                                                   islands_exploration_range)])
oos_set = oos_set[:final_test_size]

y_test = evaluate_islands_on_set(oos_set)

# Evaluate Test Set Performance
for _KRIGING in [0,1]:
    if _KRIGING:
        y_hat_test = np.array([surrogate_models[_KRIGING].predict(v) for v in oos_set])
    else:
        y_hat_test = surrogate_models[_KRIGING].predict(oos_set)

    # MSE performance
    for i in range(montecarlos):
        mse_perf[_KRIGING,i] = mean_squared_error(y_test[i:(i+1)*test_size],
                                                  y_hat_test[i:(i+1)*test_size])

## Plot the densities for each of the methods

In [None]:
# Plot Performance Results
import matplotlib.pylab as plt
import seaborn as sns
%matplotlib inline

# Plot labels
experiment_labels = ["XGBoost","Kriging"]

fig,ax = plt.subplots(figsize=(12, 5), dpi=300)

xgb_label = "XGBoost: Mean" + str(mse_perf[0].mean()) + ", Variance" + str(mse_perf[0].var())
k_label = "Kriging: Mean" + str(mse_perf[1].mean()) + ", Variance" + str(mse_perf[1].var())

fig1 = sns.distplot(mse_perf[0], label = xgb_label, ax=ax)
fig2 = sns.distplot(mse_perf[1], label = k_label, ax=ax)
plt.title("Out-Of-Sample Prediction Performance")
plt.xlabel('Mean-Squared Error')
plt.yticks(fig1.get_yticks(), fig1.get_yticks() / 10000)
plt.ylabel('Density')
plt.legend()
fig.savefig("xgboost_kriging_ba_comparison_" + str(budget) + ".png");