In [1]:
import sys

import numpy as np
import numpy.typing as npt
import pandas as pd

sys.path.append("..")
from NGS.data import preprocess
from path import DATA_DIR, RESULT_DIR

# Graph domain interpolation/extrapolation
heat_file_names = ["heat_train", "heat_test_int", "heat_test_ext"]
rossler_file_names = ["rossler_train", "rossler_test_int", "rossler_test_ext"]


In [2]:
def get_mae(
    pred_trajectories: list[npt.NDArray[np.float32]],
    true_trajectories: list[npt.NDArray[np.float32]],
) -> list[float]:
    return [
        np.abs(pred - true).mean().item()
        for pred, true in zip(pred_trajectories, true_trajectories)
    ]


def get_maes(system: str, missing: float, noise: float) -> dict[str, list[float]]:
    file_names = heat_file_names if system == "heat" else rossler_file_names
    int_ext_idx = 21 if system == "heat" else 41
    exp_id = f"{system}_p{missing}_s{noise}"
    result_dir = RESULT_DIR / exp_id
    maes: dict[str, list[float]] = {}

    for file_name in file_names:
        # Load data
        data_df = pd.read_pickle(DATA_DIR / f"{file_name}.pkl")
        pred_df = pd.read_pickle(result_dir / f"{file_name}.pkl")
        num_samples = len(pred_df)

        if "train" in file_name:
            train, val = preprocess(data_df, val_ratio=0.2)
            train_true_trajectories = train["trajectories"]
            train_pred_trajectories = pred_df["trajectories"][: -int(0.2 * num_samples)]
            maes["train"] = get_mae(  # MAE except initial condition
                [traj[1:] for traj in train_pred_trajectories],
                [traj[1:] for traj in train_true_trajectories],
            )

            val_true_trajectories = val["trajectories"]
            val_pred_trajectories = pred_df["trajectories"][-int(0.2 * num_samples) :]
            maes["val"] = get_mae(  # MAE except initial condition
                [traj[1:] for traj in val_pred_trajectories],
                [traj[1:] for traj in val_true_trajectories],
            )
        else:
            _, test = preprocess(data_df)
            true_trajectories = test["trajectories"]
            pred_trajectories = pred_df["trajectories"]

            # Time domain interpolation/extrapolation
            key = "_".join(e for e in file_name.split("_")[1:])
            maes[key + "_int"] = get_mae(  # MAE except initial condition
                [traj[1:int_ext_idx] for traj in pred_trajectories],
                [traj[1:int_ext_idx] for traj in true_trajectories],
            )
            maes[key + "_ext"] = get_mae(
                [traj[int_ext_idx:] for traj in pred_trajectories],
                [traj[int_ext_idx:] for traj in true_trajectories],
            )
    return maes

In [3]:
missing, noise = 0.1, 0.001
maes = get_maes("heat", missing, noise)
train = np.array(maes["train"]) * 1e4
val = np.array(maes["val"]) * 1e4
graph_int_time_int = np.array(maes["test_int_int"]) * 1e4
graph_ext_time_int = np.array(maes["test_ext_int"]) * 1e4
graph_int_time_ext = np.array(maes["test_int_ext"]) * 1e4
graph_ext_time_ext = np.array(maes["test_ext_ext"]) * 1e4

print("train", end="\t")
print(
    f"{train.mean():.3e} ± {1.96 * train.std() / np.sqrt(len(train)):.2f}",
    end="\n\n",
)

print("validation", end="\t")
print(f"{val.mean():.2f} ± {1.96 * val.std() / np.sqrt(len(val)):.2f}", end="\n\n")

print("graph_int, time_int", end="\t")
print(
    f"{graph_int_time_int.mean():.2f} ± {1.96 * graph_int_time_int.std() / np.sqrt(len(graph_int_time_int)):.2f}",
    end="\n\n",
)

print("graph_ext, time_int", end="\t")
print(
    f"{graph_ext_time_int.mean():.2f} ± {1.96 * graph_ext_time_int.std() / np.sqrt(len(graph_ext_time_int)):.2f}",
    end="\n\n",
)

print("graph_int, time_ext", end="\t")
print(
    f"{graph_int_time_ext.mean():.2f} ± {1.96 * graph_int_time_ext.std() / np.sqrt(len(graph_int_time_ext)):.2f}",
    end="\n\n",
)

print("graph_ext, time_ext", end="\t")
print(
    f"{graph_ext_time_ext.mean():.2f} ± {1.96 * graph_ext_time_ext.std() / np.sqrt(len(graph_ext_time_ext)):.2f}",
    end="\n\n",
)

train	4.000e+00 ± 0.06

validation	3.95 ± 0.13

graph_int, time_int	3.98 ± 0.24

graph_ext, time_int	4.46 ± 0.33

graph_int, time_ext	5.39 ± 0.54

graph_ext, time_ext	5.73 ± 0.52



In [4]:
missing, noise = 0.1, 0.001
maes = get_maes("rossler", missing, noise)
train = np.array(maes["train"]) * 1e1
val = np.array(maes["val"]) * 1e1
graph_int_time_int = np.array(maes["test_int_int"]) * 1e1
graph_ext_time_int = np.array(maes["test_ext_int"]) * 1e1
graph_int_time_ext = np.array(maes["test_int_ext"]) * 1e1
graph_ext_time_ext = np.array(maes["test_ext_ext"]) * 1e1

print("train", end="\t")
print(
    f"{train.mean():.4f} ± {1.96 * train.std() / np.sqrt(len(train)):.4f}",
    end="\n\n",
)

print("validation", end="\t")
print(f"{val.mean():.2f} ± {1.96 * val.std() / np.sqrt(len(val)):.2f}", end="\n\n")

print("graph_int, time_int", end="\t")
print(
    f"{graph_int_time_int.mean():.2f} ± {1.96 * graph_int_time_int.std() / np.sqrt(len(graph_int_time_int)):.2f}",
    end="\n\n",
)

print("graph_ext, time_int", end="\t")
print(
    f"{graph_ext_time_int.mean():.2f} ± {1.96 * graph_ext_time_int.std() / np.sqrt(len(graph_ext_time_int)):.2f}",
    end="\n\n",
)

print("graph_int, time_ext", end="\t")
print(
    f"{graph_int_time_ext.mean():.2f} ± {1.96 * graph_int_time_ext.std() / np.sqrt(len(graph_int_time_ext)):.2f}",
    end="\n\n",
)

print("graph_ext, time_ext", end="\t")
print(
    f"{graph_ext_time_ext.mean():.2f} ± {1.96 * graph_ext_time_ext.std() / np.sqrt(len(graph_ext_time_ext)):.2f}",
    end="\n\n",
)

train	0.2194 ± 0.0155

validation	0.22 ± 0.03

graph_int, time_int	0.25 ± 0.08

graph_ext, time_int	0.34 ± 0.08

graph_int, time_ext	1.55 ± 0.72

graph_ext, time_ext	2.20 ± 0.68

