In [None]:
from pathlib import Path
import pickle
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [None]:
out = Path("out")

In [None]:
ckpt_red_20 = pickle.load(open(out / "mc_red.p", "rb"))
df_red_20 = pd.DataFrame(ckpt_red_20["fit"].T, columns=[str(l) for l in ckpt_red_20["train_lens"]])
df_red_20 = df_red_20.melt(var_name="length", value_name="fit")
df_red_20.insert(0, "model", "reduced 20")

In [None]:
ckpt_red_10 = pickle.load(open(out / "mc_red_10.p", "rb"))
df_red_10 = pd.DataFrame(ckpt_red_10["fit"].T, columns=[str(l) for l in ckpt_red_10["train_lens"]])
df_red_10 = df_red_10.melt(var_name="length", value_name="fit")
df_red_10.insert(0, "model", "reduced 10")

In [None]:
ckpt_full = pickle.load(open(out / "mc_full.p", "rb"))

df_full = pd.DataFrame(ckpt_full["fit_bfgs"].T, columns=[str(l) for l in ckpt_full["train_lens"]])
df_full = df_full.melt(var_name="length", value_name="fit")
df_full.insert(0, "model", "full")

In [None]:
df_mc = pd.concat((df_red_20, df_red_10, df_full), ignore_index=True)

In [None]:
rms_y = 66.69 # 1e5 rmse of y 
fit_lin = 77.17 # fit of a linear baseline obtained with MATLAB's sysid toolbox
rmse_lin = (1 - fit_lin/100.0) * rms_y # rmse of a linear baseline
rmse_lin/1e5

In [None]:
df_mc["fit"] = np.maximum(df_mc["fit"], 0.0)
df_mc["rmse"] = (1 - df_mc["fit"]/100.0) * rms_y

In [None]:
df_mc = df_mc[df_mc["length"].isin(["100", "200", "400", "600", "800", "1000", "2000", "3000", "4000", "5000"])]

In [None]:
model_labels = {
    "full": "full-order",
    "reduced 10": "reduced, $n_\phi$ = 10",
    "reduced 20": "reduced, $n_\phi$ = 20",
    # Add more mappings as needed
}
model_order = ["full", "reduced"]

plt.figure(figsize=(8, 4))
ax = sns.boxplot(df_mc, x="length", y="fit", hue=df_mc["model"].map(model_labels), hue_order=["full-order", "reduced, $n_\phi$ = 20", "reduced, $n_\phi$ = 10"])
#ax = sns.boxplot(df_mc, x="length", y="fit", hue=df_mc["model"], hue_order=["full", "reduced 20", "reduced 10"])
plt.tight_layout()  # This adjusts the plot to ensure the legend fits
plt.xticks(rotation=45)
ax.set_xlabel("Training sequence length (-)")
ax.set_ylabel("Test FIT (%)")
ax.axhline(y=fit_lin, color="black", linestyle="dotted", alpha=0.5, label="LTI 40960 samples")#, linewidth=0.5)
ax.axhline(y=98.9, color="black", linestyle="dashed", alpha=0.5, label="Full-order 40960 samples")#, linewidth=0.5)
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)
plt.legend(loc='lower right')#, borderaxespad=0)
plt.ylim([-5, 100])
plt.tight_layout()
plt.savefig(Path("fig")/"boxplot.pdf")

In [None]:
df_stats = df_mc.groupby(["model", "length"]).agg(["mean", "median", "std"])
df_stats

In [None]:
plt.figure()
#plt.plot(df_stats.loc["full"]["fit"]["mean"])
#plt.fill_between(df_stats.loc["full"]["fit"]["mean"].index, df_stats.loc["full"]["fit"]["mean"] - df_stats.loc["full"]["fit"]["std"], df_stats.loc["full"]["fit"]["mean"] + df_stats.loc["full"]["fit"]["std"], alpha=0.3)
#plt.fill_between(df_stats.loc["reduced"]["fit"]["mean"].index, df_stats.loc["reduced"]["fit"]["mean"] - df_stats.loc["reduced"]["fit"]["std"], df_stats.loc["reduced"]["fit"]["mean"] + df_stats.loc["reduced"]["fit"]["std"], alpha=0.3)

#plt.plot(df_stats.loc["reduced"]["fit"]["mean"])