In [None]:
from pathlib import Path
import pickle
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [None]:
tex_fonts = {
    
    # Use LaTeX to write all text
    "text.usetex": True,
    "font.family": "serif",
    # Use 10pt font in plots, to match 10pt font in document
   "axes.labelsize": 12,
   "font.size": 12,

   "legend.fontsize": 10,
   "xtick.labelsize": 10,
   "ytick.labelsize": 10
}

In [None]:
plt.rcParams.update(tex_fonts) # use latex fonts
#plt.rcParams.update({"axes.grid": True}) 

In [None]:
out = Path("out")

fig_dir = Path("fig")
fig_dir.mkdir(exist_ok=True)

In [None]:
ckpt_red_20 = pickle.load(open(out / "mc_red.p", "rb"))
df_red_20 = pd.DataFrame(ckpt_red_20["fit"].T, columns=[str(l) for l in ckpt_red_20["train_lens"]])
df_red_20 = df_red_20.melt(var_name="length", value_name="fit")
df_red_20.insert(0, "model", "reduced 20")

In [None]:
ckpt_red_10 = pickle.load(open(out / "mc_red_10.p", "rb"))
df_red_10 = pd.DataFrame(ckpt_red_10["fit"].T, columns=[str(l) for l in ckpt_red_10["train_lens"]])
df_red_10 = df_red_10.melt(var_name="length", value_name="fit")
df_red_10.insert(0, "model", "reduced 10")

In [None]:
ckpt_full = pickle.load(open(out / "mc_full.p", "rb"))

df_full = pd.DataFrame(ckpt_full["fit_bfgs"].T, columns=[str(l) for l in ckpt_full["train_lens"]])
df_full = df_full.melt(var_name="length", value_name="fit")
df_full.insert(0, "model", "full")

In [None]:
df_mc = pd.concat((df_red_20, df_red_10, df_full), ignore_index=True)

In [None]:
rms_y = 66.69 # 1e5 rmse of y 
fit_lin = 77.17 # fit of a linear baseline obtained with MATLAB's sysid toolbox
rmse_lin = (1 - fit_lin/100.0) * rms_y # rmse of a linear baseline
rmse_lin/1e5

In [None]:
df_mc["fit"] = np.maximum(df_mc["fit"], 0.0)
df_mc["rmse"] = (1 - df_mc["fit"]/100.0) * rms_y

In [None]:
df_mc = df_mc[df_mc["length"].isin(["100", "200", "400", "500", "600", "800", "1000", "2000", "3000", "4000", "5000"])]
df_mc["length"] = df_mc["length"].astype(int)
df_mc.sort_values(by=["model", "length"], inplace=True)

In [None]:
model_labels = {
    "full": r"Full-order",
    "reduced 20": r"Reduced-order, $n_\phi$ = 20",
    "reduced 10": r"Reduced-order, $n_\phi$ = 10",
}
model_order = ["full", "reduced"]

fig, ax = plt.subplots(1, 1, figsize=(8, 4))
ax.axhline(y=98.9, color="black", linestyle="dashed", alpha=0.5, label="Full-order 40960 samples")#, linewidth=0.5)
ax.axhline(y=fit_lin, color="black", linestyle="dotted", alpha=0.5, label="LTI 40960 samples")#, linewidth=0.5)
ax = sns.boxplot(df_mc, x="length", y="fit", hue=df_mc["model"].map(model_labels), hue_order=model_labels.values(), ax=ax)
#ax = sns.boxplot(df_mc, x="length", y="fit", hue=df_mc["model"], hue_order=["full", "reduced 20", "reduced 10"])
plt.tight_layout()  # This adjusts the plot to ensure the legend fits
plt.xticks(rotation=45)
ax.set_xlabel(r"Training sequence length (-)")
ax.set_ylabel(r"Test FIT (\%)")
#plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)
plt.legend(loc='lower right', bbox_to_anchor=(1, 0.1))#, borderaxespad=0)
plt.ylim([-5, 100])
plt.tight_layout()

plt.savefig(fig_dir/"boxplot.pdf")

In [None]:
# median performance
df_mc.groupby(["model", "length"]).median()

In [None]:
# failing runs
df_mc[df_mc["fit"] < 1.0].groupby(["model", "length"]).count()

In [None]:
df_stats = df_mc.groupby(["model", "length"]).agg(["mean", "median", "std"])
df_stats

In [None]:
plt.figure()
#plt.plot(df_stats.loc["full"]["fit"]["mean"])
#plt.fill_between(df_stats.loc["full"]["fit"]["mean"].index, df_stats.loc["full"]["fit"]["mean"] - df_stats.loc["full"]["fit"]["std"], df_stats.loc["full"]["fit"]["mean"] + df_stats.loc["full"]["fit"]["std"], alpha=0.3)
#plt.fill_between(df_stats.loc["reduced"]["fit"]["mean"].index, df_stats.loc["reduced"]["fit"]["mean"] - df_stats.loc["reduced"]["fit"]["std"], df_stats.loc["reduced"]["fit"]["mean"] + df_stats.loc["reduced"]["fit"]["std"], alpha=0.3)

#plt.plot(df_stats.loc["reduced"]["fit"]["mean"])