### Setup

In [None]:
from common import *

figure_saver = figure_saver(sub_directory="model_scores")

## Compare model performance scores

In [None]:
exp_folders = [
    "lagged_lai_only",
    "lagged_vod_only",
    "15_most_important",
    "no_temporal_shifts",
    "lagged_fapar_only",
    "fapar_only",
    "lagged_sif_only",
    "sif_only",
    "all",
    "lai_only",
    "vod_only",
    "best_top_15",
]
print(len(exp_folders))
pprint(exp_folders)

In [None]:
data = load_experiment_data(exp_folders, "model_scores")
test_r2s = {
    experiment_name_dict[exp]: exp_data["test_r2"] for exp, exp_data in data.items()
}
train_r2s = {
    experiment_name_dict[exp]: exp_data["train_r2"] for exp, exp_data in data.items()
}

In [None]:
print(f"CURR: {test_r2s['CURR']:0.3f}")
print(f"BEST15: {test_r2s['BEST15']:0.3f}")

In [None]:
keys = list(test_r2s)
indices = np.argsort(list(test_r2s.values()))[::-1]

In [None]:
df = pd.DataFrame(
    {
        "test R2": pd.Series(test_r2s).iloc[indices],
        "train R2": pd.Series(train_r2s).iloc[indices],
    }
)
df["train - test R2"] = df["train R2"] - df["test R2"]
df.round(3)

#### Sort by train - test gap

In [None]:
df.sort_values("train - test R2").round(3)

In [None]:
comp = ("ALL", "BEST15")
print(f"val R2 ({comp[1]} → {comp[0]}): {test_r2s[comp[0]] - test_r2s[comp[1]]:0.3f}")

In [None]:
comp = ("BEST15", "CURR")
print(f"val R2 ({comp[1]} → {comp[0]}): {test_r2s[comp[0]] - test_r2s[comp[1]]:0.3f}")

In [None]:
comp = ("ALL", "CURR")
print(f"val R2 ({comp[1]} → {comp[0]}): {test_r2s[comp[0]] - test_r2s[comp[1]]:0.3f}")

In [None]:
comp = ("ALL", "CURR")
print(
    f"train R2 ({comp[1]} → {comp[0]}): {train_r2s[comp[0]] - train_r2s[comp[1]]:0.3f}"
)

In [None]:
fig, ax = plt.subplots(figsize=(6, 3.5))
ax.plot(
    np.array(list(test_r2s.values()))[indices],
    linestyle="",
    marker="o",
    c="C0",
    label="validation",
)
ax.plot(
    np.array(list(train_r2s.values()))[indices],
    linestyle="",
    marker="x",
    c="C1",
    label="train",
)
ax.grid(alpha=0.4, linestyle="--")
ax.set_xticks(list(range(len(test_r2s))))
ax.set_xticklabels(list(np.array(list(test_r2s))[indices]), rotation=45, ha="right")
ax.set_ylabel(r"$\mathrm{R}^2$")

# figure_saver.save_figure(fig, "model_comp_scores")

In [None]:
# Adapted from: https://matplotlib.org/gallery/subplots_axes_and_figures/broken_axis.html
from operator import add, sub

s_train_r2s = np.array(list(train_r2s.values()))[indices]
s_validation_r2s = np.array(list(test_r2s.values()))[indices]

# Ratio of training R2 range to validation R2 range.
train_validation_ratio = np.ptp(s_train_r2s) / np.ptp(s_validation_r2s)

fig = plt.figure(figsize=(4, 2.2), dpi=200)

all_ax = fig.add_subplot(1, 1, 1)
all_ax.set_ylabel(r"$\mathrm{R}^2$", labelpad=29)
all_ax.set_xticks([])
all_ax.set_yticks([])
all_ax.set_frame_on(
    False
)  # So we don't get black bars showing through the 'broken' gap.

# Break the y-axis into 2 parts.
# fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(6, 3.5))
ax1, ax2 = fig.subplots(
    2, 1, sharex=True, gridspec_kw=dict(height_ratios=[train_validation_ratio, 1])
)
fig.subplots_adjust(hspace=0.05)  # adjust space between axes

# Plot train and validation R2s.

train_kwargs = dict(linestyle="", marker="x", c="C1", label="train")
ax1.plot(s_train_r2s, **train_kwargs)

validation_kwargs = dict(linestyle="", marker="o", c="C0", label="validation")
ax2.plot(s_validation_r2s, **validation_kwargs)

ax2.set_yticks(np.arange(0.575, 0.675 + 0.01, 0.025))

ax2.legend(
    handles=[
        Line2D([0], [0], **kwargs) for kwargs in (train_kwargs, validation_kwargs)
    ],
    loc="lower left",
)

ylim_1 = ax1.get_ylim()
ylim_2 = ax2.get_ylim()

margin_f = (0.03, 0.04)  # Two-sided relative margin addition.
ax1.set_ylim(
    [
        op(ylim_val, factor * np.ptp(ylim_1))
        for ylim_val, factor, op in zip(ylim_1, margin_f, (sub, add))
    ]
)
ax2.set_ylim(
    [
        op(ylim_val, factor * np.ptp(ylim_1) / train_validation_ratio)
        for ylim_val, factor, op in zip(ylim_2, margin_f, (sub, add))
    ]
)
# ax2.set_ylim(ylim_2[0], ylim_2[1] + margin_f * np.ptp(ylim_1) / train_validation_ratio)

# hide the spines between ax and ax2
ax1.spines["bottom"].set_visible(False)
ax2.spines["top"].set_visible(False)
ax1.xaxis.tick_top()
ax1.tick_params(labeltop=False)  # don't put tick labels at the top
ax1.xaxis.set_ticks_position("none")  # hide top ticks themselves (not just labels)

ax2.xaxis.tick_bottom()

ax2.set_xticks(list(range(len(test_r2s))))
ax2.set_xticklabels(
    list(np.array(list(test_r2s))[indices]),
    rotation=45,
    ha="right",
)
ax2.tick_params(axis="x", which="major", pad=0)

# Now, let's turn towards the cut-out slanted lines.
# We create line objects in axes coordinates, in which (0,0), (0,1),
# (1,0), and (1,1) are the four corners of the axes.
# The slanted lines themselves are markers at those locations, such that the
# lines keep their angle and position, independent of the axes size or scale
# Finally, we need to disable clipping.

d = 0.5  # proportion of vertical to horizontal extent of the slanted line
kwargs = dict(
    marker=[(-1, -d), (1, d)],
    markersize=8,
    linestyle="none",
    color="k",
    mec="k",
    mew=1,
    clip_on=False,
)
ax1.plot([0, 1], [0, 0], transform=ax1.transAxes, **kwargs)
ax2.plot([0, 1], [1, 1], transform=ax2.transAxes, **kwargs)

for ax in (ax1, ax2):
    ax.grid(alpha=0.4, linestyle="--")
    ax.set_xticks(list(range(len(test_r2s))))

figure_saver.save_figure(fig, "model_comp_scores")