## Setup

In [None]:
from specific import *

figure_saver = figure_saver(sub_directory="train_val_set")
map_figure_saver = map_figure_saver(sub_directory="train_val_set")

In [None]:
(
    endog_data,
    exog_data,
    master_mask,
    filled_datasets,
    masked_datasets,
    land_mask,
) = get_offset_data()

In [None]:
X_train, X_val, y_train, y_val = data_split_cache.load()
rf = get_model()

### BA in the train and validation sets

Valid elements are situated where master_mask is False

In [None]:
valid_indices = np.where(~master_mask.ravel())[0]

valid_train_indices, valid_val_indices = train_test_split(
    valid_indices, random_state=1, shuffle=True, test_size=0.3
)

masked_train_data = np.ma.MaskedArray(
    np.zeros_like(master_mask, dtype=np.float64), mask=np.ones_like(master_mask)
)
masked_train_data.ravel()[valid_train_indices] = y_train.values

masked_val_data = np.ma.MaskedArray(
    np.zeros_like(master_mask, dtype=np.float64), mask=np.ones_like(master_mask)
)
masked_val_data.ravel()[valid_val_indices] = y_val.values

In [None]:
for fname, title, indices in zip(
    ("train_set_mask", "val_set_mask"),
    ("Train Set Mask", "Validation Set Mask"),
    (valid_train_indices, valid_val_indices),
):
    with map_figure_saver(fname):
        mask = np.ma.MaskedArray(np.zeros_like(master_mask), mask=master_mask.copy())
        mask.ravel()[indices] = 1
        cube_plotting(
            mask,
            title=title,
            nbins=12,
            cmap="viridis",
            colorbar_kwargs={"label": "Fraction Present"},
        )

In [None]:
with map_figure_saver("train_val_set_overall_ba_comp"):
    fig, axes = plt.subplots(
        3,
        1,
        constrained_layout=True,
        figsize=(5.1, 8.4),
        subplot_kw={"projection": ccrs.Robinson()},
    )
    shared_kwargs = {
        "boundaries": [0, 4e-6, 1e-5, 1e-4, 1e-3, 1e-2, 8e-2],
        "extend": "max",
        "cmap": "inferno",
        "colorbar_kwargs": {"format": "%0.1e", "label": "Fractional BA"},
        "coastline_kwargs": {"linewidth": 0.3},
        "title": "",
    }
    axes[0].set_title("Mean Overall GFED4 BA")
    cube_plotting(
        get_masked_array(endog_data.values, master_mask),
        ax=axes[0],
        fig=fig,
        **shared_kwargs
    )
    axes[1].set_title("Mean Train Set GFED4 BA")
    cube_plotting(masked_train_data, ax=axes[1], fig=fig, **shared_kwargs)
    axes[2].set_title("Mean Validation Set GFED4 BA")
    cube_plotting(masked_val_data, ax=axes[2], fig=fig, **shared_kwargs)

In [None]:
with map_figure_saver("train_val_set_difference"):
    cube_plotting(
        np.mean(masked_train_data, axis=0) - np.mean(masked_val_data, axis=0),
        cmap="RdBu_r",
        nbins=9,
        log=True,
        min_edge=1e-2,
        cmap_midpoint=0,
        cmap_symmetric=True,
        colorbar_kwargs={"format": "%0.1e", "label": "Fractional BA"},
        coastline_kwargs={"linewidth": 0.3},
        title="<Train> - <Validation>",
        fig=plt.figure(figsize=(5.1, 2.8)),
    )

In [None]:
with map_figure_saver("train_val_set_rel_difference"):
    cube_plotting(
        (np.mean(masked_train_data, axis=0) - np.mean(masked_val_data, axis=0))
        / np.mean(get_masked_array(endog_data.values, master_mask), axis=0),
        cmap="RdBu_r",
        nbins=9,
        log=True,
        min_edge=1e-1,
        cmap_midpoint=0,
        cmap_symmetric=True,
        colorbar_kwargs={"format": "%0.1e", "label": "Fractional BA"},
        coastline_kwargs={"linewidth": 0.3},
        title="(<Train> - <Validation>) / <GFED4>",
        fig=plt.figure(figsize=(5.1, 2.8)),
    )

In [None]:
print("<Train> - <Validation>:", np.mean(masked_train_data) - np.mean(masked_val_data))