## Setup

In [None]:
from specific import *

In [None]:
client = get_client()
client

### Get unshifted data with standard interpolation

In [None]:
# XXX:
# data_memory.clear()

(
    endog_data,
    exog_data,
    master_mask,
    filled_datasets,
    masked_datasets,
    land_mask,
) = get_data()

In [None]:
_ = cube_plotting(
    master_mask.astype("float"), title="Missing Samples with Temporal Interpolation"
)

In [None]:
@data_split_cache
def get_split_data():
    X_train, X_test, y_train, y_test = train_test_split(
        exog_data, endog_data, random_state=1, shuffle=True, test_size=0.3
    )
    return X_train, X_test, y_train, y_test


X_train, X_test, y_train, y_test = get_split_data()

In [None]:
rf = get_model(X_train, y_train)

In [None]:
rf.n_jobs = get_ncpus()
with parallel_backend("threading", n_jobs=get_ncpus()):
    y_pred = rf.predict(X_test)
    y_train_pred = rf.predict(X_train)

print("Test R2:", r2_score(y_test, y_pred))
print("Test MSE:", mean_squared_error(y_test, y_pred))
print("Train R2:", r2_score(y_train, y_train_pred))
print("Train MSE:", mean_squared_error(y_train, y_train_pred))

### Forego temporal interpolation

In [None]:
# XXX:
# data_memory.clear()

(
    u_endog_data,
    u_exog_data,
    u_master_mask,
    u_filled_datasets,
    u_masked_datasets,
    u_land_mask,
) = get_data(n_months=0)

In [None]:
_ = cube_plotting(
    u_master_mask.astype("float"),
    title="Missing Samples without Temporal Interpolation",
)

### Train a model without the northern samples

In [None]:
@u_data_split_cache
def u_get_split_data():
    u_X_train, u_X_test, u_y_train, u_y_test = train_test_split(
        u_exog_data, u_endog_data, random_state=1, shuffle=True, test_size=0.3
    )
    return u_X_train, u_X_test, u_y_train, u_y_test


u_X_train, u_X_test, u_y_train, u_y_test = u_get_split_data()

In [None]:
u_rf = uninterp_get_model(u_X_train, u_y_train)

In [None]:
u_rf.n_jobs = get_ncpus()
with parallel_backend("threading", n_jobs=get_ncpus()):
    u_y_pred = u_rf.predict(u_X_test)
    u_y_train_pred = u_rf.predict(u_X_train)

print("Test R2:", r2_score(u_y_test, u_y_pred))
print("Test MSE:", mean_squared_error(u_y_test, u_y_pred))
print("Train R2:", r2_score(u_y_train, u_y_train_pred))
print("Train MSE:", mean_squared_error(u_y_train, u_y_train_pred))

## Robustness test

### Select samples that were made available by interpolation (ie. that were not available for the uninterpolated case) and test model performance for these samplesl

In [None]:
new_master_mask = master_mask.copy()

# Deselect previously seen elements, only select those that arose due to interpolation.
new_master_mask[~u_master_mask] = True
new_valid_indices = np.where(~new_master_mask.ravel())[0]

endog = get_masked_array(endog_data, master_mask)
new_endog_data = endog.ravel()[new_valid_indices]
new_endog = np.ma.MaskedArray(np.zeros_like(endog), mask=True)
new_endog.ravel()[new_valid_indices] = new_endog_data

In [None]:
new_exog_dict = {}
for column in exog_data.columns:
    new_exog_dict[column] = get_masked_array(
        exog_data[column].to_numpy(), master_mask
    ).ravel()[new_valid_indices]
new_exog_data = pd.DataFrame(new_exog_dict)

In [None]:
_ = cube_plotting(endog, log=True)

In [None]:
_ = cube_plotting(new_master_mask.astype("float"))

In [None]:
_ = cube_plotting(new_endog, log=True)

In [None]:
exog_data.columns

In [None]:
for variable in ("SIF 3NN", "AGB Tree", "Dry Day Period"):
    cube_plotting(
        get_masked_array(new_exog_data[variable], new_master_mask),
        title=variable,
        log=True,
        min_edge=1,
    )

### Predict BA for these regions

In [None]:
u_rf.n_jobs = get_ncpus()
with parallel_backend("threading", n_jobs=get_ncpus()):
    new_pred = u_rf.predict(new_exog_data)

In [None]:
boundaries = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]

plot_params = {
    "extend": "min",
    "cmap": "YlOrRd",
    "boundaries": boundaries,
    "colorbar_kwargs": {"label": "Burned Area Fraction", "format": "%0.0e"},
    "coastline_kwargs": {"linewidth": 0.3},
    "log": True,
}

In [None]:
_ = cube_plotting(new_endog, title="GFED4", **plot_params)

In [None]:
_ = cube_plotting(
    get_masked_array(new_pred, new_master_mask), title="U Prediction", **plot_params
)

In [None]:
print("R2:", r2_score(new_endog_data, new_pred))
print("MSE:", mean_squared_error(new_endog_data, new_pred))

In [None]:
plt.scatter(new_endog_data, new_pred)

In [None]:
plt.scatter(u_y_test, u_y_pred, alpha=0.1)
plt.xscale("log")
plt.yscale("log")