## Setup

In [None]:
from specific import *

exp_name = experiment_name_dict[PROJECT_DIR.name]

### Get shifted data

In [None]:
(
    endog_data,
    exog_data,
    master_mask,
    filled_datasets,
    masked_datasets,
    land_mask,
) = get_offset_data()

### Retrieve previous results from the 'model' notebook

In [None]:
X_train, X_val, y_train, y_val = data_split_cache.load()
rf = get_model()
masked_train_data = get_mm_data(y_train.values, master_mask, "train")
masked_val_data = get_mm_data(y_val.values, master_mask, "val")

### Predict out-of-sample BA

In [None]:
n_threads = get_ncpus()
rf.n_jobs = n_threads
with parallel_backend("threading", n_jobs=n_threads):
    predicted_ba = get_mm_data(rf.predict(X_val), master_mask, "val")

### Histogram of Predictions vs. Observations

In [None]:
f_pred = get_unmasked(predicted_ba)
f_obs = get_unmasked(masked_val_data)
assert f_pred.shape == f_obs.shape
print(f"Mean out-of-sample obs.: {np.mean(f_obs):0.2e}")
print(f"Mean out-of-sample pred.: {np.mean(f_pred):0.2e}")

In [None]:
fig, axes = plt.subplots(
    1, 2, sharey=True, gridspec_kw=dict(width_ratios=[0.07, 1]), figsize=(7, 4)
)

lower_range_lim = 2.2e-5

axes[0].hist(
    np.hstack((f_obs.reshape(-1, 1), f_pred.reshape(-1, 1))),
    bins=[0, lower_range_lim],
    histtype="bar",
    label=["Obs.", "Pred."],
)
axes[0].set_xlim(0, 2.1e-5)


@ticker.FuncFormatter
def major_formatter(x, pos):
    if x == 0:
        return 0
    exp = math.floor(np.log10(x))
    coeff = x / 10 ** exp
    if coeff.is_integer():
        coeff = round(coeff)
        if coeff == 1:
            return fr"$10^{ {exp}}$"
        return fr"${coeff} \times 10^{ {exp}}$"
    return f"{x:0.1e}"


axes[0].xaxis.set_major_formatter(major_formatter)


axes[1].hist(
    np.hstack((f_obs.reshape(-1, 1), f_pred.reshape(-1, 1))),
    bins=np.geomspace(lower_range_lim, max(np.max(f_pred), np.max(f_obs)), num=15),
    histtype="bar",
    label=["Obs.", "Pred."],
)
axes[1].set_xscale("log")
axes[1].set_xlim(2e-5, 1)

for ax in axes:
    ax.set_yscale("log")
    ax.grid(alpha=0.4, linestyle="--")
    ax.set_ylim(3e1, 6e5)

axes[1].legend(loc="best")

axes[0].set_ylabel(f"counts (out-of-sample, {exp_name} model)")
axes[1].set_xlabel("BA")

fig.tight_layout(w_pad=-1.6)
figure_saver.save_figure(fig, "obs_pred_hist", sub_directory="predictions")

In [None]:
with figure_saver("obs_pred_comp", sub_directory="predictions"):
    cube_plotting(
        np.mean(masked_val_data - predicted_ba, axis=0),
        fig=plt.figure(figsize=(5.1, 2.3)),
        cmap="brewer_RdYlBu_11",
        cmap_midpoint=0,
        cmap_symmetric=False,
        boundaries=[-0.01, -0.001, -1e-4, 0, 0.001, 0.01, 0.02],
        colorbar_kwargs=dict(
            format=ticker.FuncFormatter(lambda x, pos: simple_sci_format(x)),
            pad=0.02,
            label="BA Obs. - Pred. (out-of-sample)",
        ),
        title="",
        coastline_kwargs={"linewidth": 0.3},
    )

In [None]:
cube_plotting(
    np.mean(masked_val_data - predicted_ba, axis=0) / np.mean(masked_val_data, axis=0),
    cmap="brewer_RdYlBu_11",
    cmap_symmetric=False,
    boundaries=[-5, -4, -3, -2, 0, 1e-2, 1e-1],
    cmap_midpoint=0,
)

In [None]:
ba_plotting(predicted_ba, masked_val_data, figure_saver)

### Example timeseries

In [None]:
# valid_indices = list(zip(*np.where(np.sum(~predicted_ba.mask, axis=0) > 5)))
valid_indices = list(
    zip(
        *np.where(
            (np.max(predicted_ba.data, axis=0) > 0.1)
            & (np.sum(~predicted_ba.mask, axis=0) > 4)
        )
    )
)
print("Nr. valid:", len(valid_indices))

for i in np.random.RandomState(0).choice(
    len(valid_indices), min(100, len(valid_indices)), replace=False
):
    plt.figure(figsize=(7, 3))
    plt.plot(masked_val_data[(slice(None), *valid_indices[i])], label="obs", marker="o")
    plt.plot(predicted_ba[(slice(None), *valid_indices[i])], label="pred", marker="x")
    plt.legend(loc="best")