In [27]:
#!/usr/bin/env python3
import os
import sys
import glob
import math
import torch
import numpy as np
import pandas as pd
from torch import nn
import matplotlib.pyplot as plt
from IPython.display import display

# =========================
# Project setup
# =========================
sys.path.append("../scripts")
sys.path.append("../utility")
from network import KoopmanNet

project_name = "Aug_8"
gamma        = 0.8
device       = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ksteps       = 15
normalize    = "nonorm"  # global default; overridden per env below

# Encode-dim interpretation for FILTERING ONLY (loading is checkpoint-driven)
# - "multiplier": encode_dims are multipliers of state_dim (old behavior)
# - "absolute":   encode_dims are actual latent sizes
encode_dim_mode = "multiplier"

# =========================
# Experiments to run
# =========================
envs        = ['Franka']#["DampingPendulum","Franka","DoublePendulum","Polynomial","Kinova", "G1", "Go2"]  # add others as needed
encode_dims = [1, 2, 4, 8, 16]      # interpreted per encode_dim_mode (for FILTERING/LABELS)
cov_regs    = [0, 1]
ctrl_regs   = [0, 1]
seeds       = [17382, 76849, 20965, 84902, 51194]
m           = 100

# =========================
# Default & per-env train sizes
# =========================
default_train_samples = [1000, 4000, 16000, 60000]

def adjust_train_samples(env: str, samples: list[int]) -> list[int]:
    """
    Scale the default train_samples depending on env so that the
    max target is 200000 for G1 and 140000 for Go2.
    """
    if env == "G1":
        return [int(s * 200000 / 60000) for s in samples]
    elif env == "Go2":
        return [int(s * 140000 / 60000) for s in samples]
    else:
        return samples

train_samples_per_env = {env: adjust_train_samples(env, default_train_samples) for env in envs}

# =========================
# Problem-specific mappings
# =========================
u_dim_map = {
    "Franka": 7,
    "DoublePendulum": 2,
    "DampingPendulum": 1,
    "G1": 23,
    "Go2": 12,
    "Kinova": 7,
}

# Override normalization per env
normalize_override = {"G1": "norm", "Go2": "norm"}

def _normalize_used_for(env: str, normalize_default: str) -> str:
    return normalize_override.get(env, normalize_default)

# =========================
# Dataset path finder
# =========================
def find_dataset_path(env: str,
                      normalize_default: str,
                      ksteps_val: int,
                      m_val: int,
                      candidate_Ntrains: list[int] | None = None) -> str | None:
    """
    Locate a dataset file for the given env/normalize/ksteps.
    Tries (1) candidate_Ntrains from log, (2) env-specific defaults (adjusted),
         (3) legacy 60000, then glob.
    """
    norm = _normalize_used_for(env, normalize_default)
    tried = []
    if candidate_Ntrains:
        tried.extend(candidate_Ntrains)
    tried.extend(train_samples_per_env.get(env, default_train_samples))
    tried.append(60000)  # legacy fallback

    # dedupe preserving order
    seen = set()
    Ntrain_candidates = [x for x in tried if not (x in seen or seen.add(x))]

    # exact attempts
    for N in Ntrain_candidates:
        if env == "Polynomial":
            path = os.path.join(
                "..", "data", "datasets",
                f"dataset_{env}_{norm}_m_{m_val}_Ktrain_{N}_Kval_20000_Ktest_20000_Ksteps_{ksteps_val}.pt"
            )
        else:
            path = os.path.join(
                "..", "data", "datasets",
                f"dataset_{env}_{norm}_Ktrain_{N}_Kval_20000_Ktest_20000_Ksteps_{ksteps_val}.pt"
            )
        if os.path.exists(path):
            return path

    # glob fallback (handles naming drift)
    base_dir = os.path.join("..", "data", "datasets")
    if env == "Polynomial":
        pattern = f"dataset_{env}_{norm}_m_{m_val}_Ktrain_*_Kval_*_Ktest_*_Ksteps_{ksteps_val}.pt"
    else:
        pattern = f"dataset_{env}_{norm}_Ktrain_*_Kval_*_Ktest_*_Ksteps_{ksteps_val}.pt"
    matches = sorted(glob.glob(os.path.join(base_dir, pattern)))
    return matches[0] if matches else None

# =========================
# Helpers
# =========================
def env_has_control(env: str) -> bool:
    """Return True if the environment has a control input (u_dim > 0)."""
    return (u_dim_map.get(env, 0) or 0) > 0

def _mode_matches(row_mode: str, selected_mode: str) -> bool:
    if row_mode is None or row_mode == "" or pd.isna(row_mode):
        return True
    row_mode = str(row_mode).strip().lower()
    selected_mode = selected_mode.strip().lower()
    if selected_mode == "multiplier":
        return row_mode in ("times_input_dim", "per_state")
    elif selected_mode == "absolute":
        return row_mode == "absolute"
    return True

def evaluate_model(model, data, u_dim, gamma, state_dim, device):
    model.eval()
    with torch.no_grad():
        steps = data.shape[0]
        if u_dim is None or u_dim == 0:
            X = model.encode(data[0].to(device))
        else:
            X = model.encode(data[0, :, u_dim:].to(device))
        encoded_initial = X[:, state_dim:]

        weighted_loss = 0.0
        beta, beta_sum = 1.0, 0.0

        for i in range(steps - 1):
            if u_dim is None or u_dim == 0:
                X      = model.forward(X, None)
                target = data[i + 1].to(device)
            else:
                X      = model.forward(X, data[i, :, :u_dim].to(device))
                target = data[i + 1, :, u_dim:].to(device)

            error = nn.MSELoss()(X[:, :state_dim], target)
            weighted_loss += beta * error
            beta_sum      += beta
            beta         *= gamma

        weighted_loss /= beta_sum

        # covariance on initial encoding (for reference)
        z           = encoded_initial
        z_centered  = z - z.mean(dim=0, keepdim=True)
        cov_matrix  = (z_centered.t() @ z_centered) / (z_centered.size(0) - 1)
        off_diag    = cov_matrix - torch.diag(torch.diag(cov_matrix))
        cov_loss    = torch.norm(off_diag, p='fro') ** 2
        encode_dim  = X.shape[1] - state_dim
        normalized_cov_loss = (cov_loss.item() / (encode_dim * (encode_dim - 1))
                               if encode_dim > 1 else cov_loss.item())

    return float(weighted_loss.item()), float(normalized_cov_loss)

def build_model_from_checkpoint(chkpt, state_dim, u_dim_hint, device):
    """
    Build a KoopmanNet that exactly matches the checkpoint shapes.
    Returns: model, u_dim_eval, enc_dim_actual, trained_Nkoop
    """
    layers = chkpt["layer"]
    sd     = chkpt["model"]

    # Infer trained Nkoop from lA
    if "lA.weight" not in sd or sd["lA.weight"].ndim != 2 or sd["lA.weight"].shape[0] != sd["lA.weight"].shape[1]:
        fallback = [(k, p) for k, p in sd.items() if k.endswith("lA.weight") and p.ndim == 2 and p.shape[0] == p.shape[1]]
        if not fallback:
            raise RuntimeError("Cannot infer Nkoop: missing or non-square lA.weight in checkpoint.")
        trained_Nkoop = fallback[0][1].shape[0]
    else:
        trained_Nkoop = sd["lA.weight"].shape[0]

    # Infer u_dim from lB if present; otherwise fall back to mapping
    if "lB.weight" in sd and sd["lB.weight"].ndim == 2:
        trained_u_dim = sd["lB.weight"].shape[1]
    else:
        trained_u_dim = u_dim_hint

    if (u_dim_hint) != trained_u_dim:
        print(f"[Warn] u_dim mismatch (map={u_dim_hint}, ckpt={trained_u_dim}); using ckpt value for eval.")

    model = KoopmanNet(layers, trained_Nkoop, trained_u_dim).to(device)
    model.load_state_dict(sd, strict=True)

    enc_dim_actual = trained_Nkoop - state_dim
    return model, trained_u_dim, enc_dim_actual, trained_Nkoop

def gmean(vals, eps: float = 1e-12) -> float:
    """Geometric mean with small epsilon for numerical stability."""
    arr = np.asarray(list(vals), dtype=float)
    arr = np.maximum(arr, eps)
    return float(np.exp(np.mean(np.log(arr))))

# =========================
# 1) Load CSV log
# =========================
csv_log_path = os.path.join("..", "log", project_name, "koopman_results_log.csv")
assert os.path.exists(csv_log_path), f"CSV log not found: {csv_log_path}"
log_df = pd.read_csv(csv_log_path)

# types / back-compat
for col in ["use_covariance_loss", "use_control_loss"]:
    if col in log_df.columns:
        log_df[col] = log_df[col].astype(int)

for k, t in {"env_name": str, "encode_dim": int, "seed": int, "model_path": str}.items():
    if k in log_df.columns:
        log_df[k] = log_df[k].astype(t)

assert "train_samples" in log_df.columns, \
    "Your log must include a 'train_samples' column for two-factor comparison."

# =========================
# 2) Infer state_dim per env & allowed dims (for FILTERING only)
# =========================
env_state_dim = {}          # env -> state_dim (if dataset exists)
dataset_found = {}          # env -> bool
encode_dims_allowed = {}    # env -> list of actual latent sizes (if known)

for env in envs:
    Ksteps_env = 1 if env == "Polynomial" else ksteps
    # Try train sizes present in the CSV for this env first
    csv_train_sizes = sorted(set(log_df[log_df["env_name"] == env]["train_samples"])) \
                      if "env_name" in log_df.columns else None
    dataset_path = find_dataset_path(env, normalize, Ksteps_env, m, candidate_Ntrains=csv_train_sizes)

    if dataset_path is not None:
        data_dict = torch.load(dataset_path, weights_only=False)
        full_dim  = data_dict["Ktest_data"].shape[2]
        u_dim     = u_dim_map.get(env, 0)
        state_dim = full_dim - u_dim
        env_state_dim[env] = state_dim
        dataset_found[env] = True

        if encode_dim_mode == "multiplier":
            encode_dims_allowed[env] = [mult * state_dim for mult in encode_dims]
        else:
            encode_dims_allowed[env] = list(encode_dims)
    else:
        env_state_dim[env] = None
        dataset_found[env] = False
        encode_dims_allowed[env] = list(encode_dims) if encode_dim_mode == "multiplier" else list(encode_dims)

# =========================
# 3) Filter log rows with per-env train sizes & encode_dim_mode
# =========================
def _row_ok(r):
    env = r.get("env_name")
    if env not in envs:
        return False

    # per-env train sizes
    env_train_list = set(train_samples_per_env.get(env, default_train_samples))
    if r.get("train_samples") not in env_train_list:
        return False

    # seed
    if r.get("seed") not in set(seeds):
        return False

    # loss toggles (if present)
    if "use_covariance_loss" in r.index and r.get("use_covariance_loss") not in set(cov_regs):
        return False
    if "use_control_loss" in r.index and r.get("use_control_loss") not in set(ctrl_regs):
        return False

    # encode_dim_mode compatibility
    row_mode = r.get("encode_dim_mode", None)
    if not _mode_matches(row_mode, encode_dim_mode):
        return False

    # encode_dim checks (tolerant)
    row_enc = r.get("encode_dim")
    if pd.isna(row_enc):
        return False
    try:
        row_enc = int(row_enc)
    except Exception:
        return False

    st_dim = env_state_dim.get(env, None)
    if st_dim is not None:
        # If we know state_dim, accept either representation:
        actual_allowed = [mult * st_dim for mult in encode_dims] \
                         if encode_dim_mode == "multiplier" else list(encode_dims)
        multiplier_allowed = set(encode_dims)
        is_ok = (row_enc in set(actual_allowed)) or (row_enc in multiplier_allowed)
    else:
        # dataset missing → be permissive but still gated by configured list
        if encode_dim_mode == "multiplier":
            is_ok = (row_enc in set(encode_dims))  # treat logged value as multiplier
        else:
            is_ok = True  # can't validate absolute without state_dim
    return is_ok

filtered_df = log_df[log_df.apply(_row_ok, axis=1)]

if filtered_df.empty:
    print("[Error] No models found after filtering.")
    print("Available envs in log:", sorted(log_df["env_name"].unique()))
    print("Per-env requested train_samples:", train_samples_per_env)
    if "train_samples" in log_df.columns:
        print("Available train_samples in log:", sorted(log_df["train_samples"].unique()))
    print("Allowed encode dims per env:")
    for e, dims in encode_dims_allowed.items():
        print(f"  {e}: {dims}")
    if "encode_dim_mode" in log_df.columns:
        print("Available encode_dim_mode:", sorted(log_df["encode_dim_mode"].astype(str).unique()))
    print("Available seeds:", sorted(log_df["seed"].unique()))
    if "use_covariance_loss" in log_df.columns:
        print("Available use_covariance_loss:", sorted(log_df["use_covariance_loss"].unique()))
    if "use_control_loss" in log_df.columns:
        print("Available use_control_loss:", sorted(log_df["use_control_loss"].unique()))
    print("Datasets found by env:", {e: dataset_found.get(e, False) for e in envs})
    if any(not dataset_found.get(e, False) for e in envs):
        print("Note: When a dataset is missing, encode-dim filtering is relaxed to avoid false negatives.")
    os.makedirs(project_name, exist_ok=True)
    pd.DataFrame(columns=[
        "Environment","TrainSamples","EncodeDim","UseCovLoss","UseControlLoss",
        "WeightedError","NormalizedCovLoss"
    ]).to_csv(os.path.join(project_name, "evaluation_summary.csv"), index=False)
    sys.exit(0)

# =========================
# 4) Index checkpoints by (env, Ntrain, enc_dim, cov, ctrl)
# =========================
results = {}
for _, row in filtered_df.iterrows():
    key = (
        row["env_name"],
        int(row["train_samples"]),
        int(row["encode_dim"]),
        row.get("use_covariance_loss", 0),
        row.get("use_control_loss", 0),
    )
    results.setdefault(key, []).append((int(row["seed"]), row["model_path"]))

# to optionally record actual z per key (for labels)
results["_actual_z_map"] = {}

# =========================
# 5) Evaluate & aggregate (geom. mean across seeds)
# =========================
agg_metrics = {}

for env in envs:
    Ksteps_env = 1 if env == "Polynomial" else ksteps
    if env == 'G1' or env == 'Go2':
        gamma = 0.99
    # Prefer train sizes present in CSV for this env when locating dataset
    csv_train_sizes = sorted(set(filtered_df[filtered_df["env_name"] == env]["train_samples"])) \
                      if "env_name" in filtered_df.columns else None
    dataset_path = find_dataset_path(env, normalize, Ksteps_env, m, candidate_Ntrains=csv_train_sizes)
    if dataset_path is None:
        print(f"[Warning] Missing dataset for {env}, skipping.")
        continue

    data_dict = torch.load(dataset_path, weights_only=False)
    test_data = torch.from_numpy(data_dict["Ktest_data"]).float().to(device)

    u_dim_hint = u_dim_map.get(env, None)
    state_dim  = test_data.shape[2] - (u_dim_hint or 0)

    for key, checkpoints in list(results.items()):
        if key == "_actual_z_map":
            continue
        if key[0] != env:
            continue
        _, Ntrain, logged_enc_dim, cov_reg, ctrl_reg = key

        w_errs, cov_losses = [], []
        actual_z_recorded = False

        for seed_val, path in checkpoints:
            if not os.path.exists(path):
                print(f"[Warning] Missing checkpoint: {path}")
                continue

            chkpt = torch.load(path, map_location=device, weights_only=False)
            try:
                model, u_dim_eval, enc_dim_actual, trained_Nkoop = build_model_from_checkpoint(
                    chkpt, state_dim, u_dim_hint, device
                )
            except Exception as e:
                print(f"[Error] Failed to build/load model from {path}: {e}")
                continue

            # Record actual z once for this key (for labels/tables)
            if not actual_z_recorded:
                results["_actual_z_map"][key] = enc_dim_actual
                actual_z_recorded = True

            w_err, c_loss = evaluate_model(model, test_data, u_dim_eval, gamma, state_dim, device)
            w_errs.append(w_err)
            cov_losses.append(c_loss)

        if not w_errs:
            continue

        # Geom. mean across seeds
        agg_metrics[key] = {
            "WeightedError_mean":     gmean(w_errs),
            "NormalizedCovLoss_mean": gmean(np.array(cov_losses) + 1e-12),
        }

# =========================
# 6) Save summary CSV
# =========================
rows = []
for (env, Ntrain, logged_enc_dim, cov_reg, ctrl_reg), metrics in agg_metrics.items():
    z_actual = results["_actual_z_map"].get((env, Ntrain, logged_enc_dim, cov_reg, ctrl_reg), logged_enc_dim)
    rows.append({
        "Environment":       env,
        "TrainSamples":      Ntrain,
        "EncodeDim":         z_actual,        # report actual z used for the loaded model
        "UseCovLoss":        cov_reg,
        "UseControlLoss":    ctrl_reg,
        "WeightedError":     metrics["WeightedError_mean"],
        "NormalizedCovLoss": metrics["NormalizedCovLoss_mean"]
    })

df_summary = pd.DataFrame(rows)
os.makedirs(project_name, exist_ok=True)
summary_path = os.path.join(project_name, "evaluation_summary.csv")
df_summary.to_csv(summary_path, index=False)
print(f"Saved summary to {summary_path}")
if not df_summary.empty:
    display(df_summary)

# ======================================================
# 7) NEW PLOTS (loss filter depends on whether env has control)
#    A) TrainSamples vs averaged error (geom. mean across EncodeDim)
#    B) EncodeDim    vs averaged error (geom. mean across TrainSamples)
# ======================================================
if df_summary.empty:
    print("[Skip] No rows in df_summary; nothing to plot.")
else:
    for env in envs:
        # ---- choose rows: cov=1 AND (ctrl=1 if env has control; otherwise ignore ctrl) ----
        if env_has_control(env) and ("UseControlLoss" in df_summary.columns):
            sub = df_summary[
                (df_summary["Environment"] == env) &
                (df_summary["UseCovLoss"] == 1) &
                (df_summary["UseControlLoss"] == 1)
            ].copy()
            title_suffix = "(cov=1, ctrl=1)"
        else:
            # e.g., Polynomial: no control loss — only require covariance loss on
            sub = df_summary[
                (df_summary["Environment"] == env) &
                (df_summary["UseCovLoss"] == 1)
            ].copy()
            title_suffix = "(cov=1; no control)"

        if sub.empty:
            print(f"[Skip] No eligible rows for env {env} with filter {title_suffix}.")
            continue

        out_dir = os.path.join(project_name, env)
        os.makedirs(out_dir, exist_ok=True)

        # -------- Plot A: TrainSamples vs averaged error --------
        grp_TS = (
            sub.groupby("TrainSamples", as_index=False)
               .agg(WeightedError_gmean=("WeightedError", lambda s: gmean(s.values)))
               .sort_values("TrainSamples")
        )
        if not grp_TS.empty:
            fig, ax = plt.subplots(figsize=(8, 6))
            ax.plot(grp_TS["TrainSamples"], grp_TS["WeightedError_gmean"], marker="o")
            ax.set_xscale("linear")
            ax.set_yscale("log")
            ax.set_xlabel("Train Samples (Ktrain)")
            ax.set_ylabel("Weighted Prediction Error (MSE, geom. mean)")
            ax.set_title(f"{env} — TrainSamples vs Error {title_suffix}")
            ax.grid(True, which="both", ls="--", alpha=0.6)
            fig.tight_layout()
            out_path = os.path.join(out_dir, f"{env}_TrainSamples_vs_Error.png")
            plt.savefig(out_path, dpi=300)
            plt.close(fig)
            print(f"Saved: {out_path}")
        else:
            print(f"[Skip] No data for TrainSamples plot in {env}.")

        # -------- Plot B: EncodeDim vs averaged error --------
        grp_Z = (
            sub.groupby("EncodeDim", as_index=False)
               .agg(WeightedError_gmean=("WeightedError", lambda s: gmean(s.values)))
               .sort_values("EncodeDim")
        )
        if not grp_Z.empty:
            fig, ax = plt.subplots(figsize=(8, 6))
            ax.plot(grp_Z["EncodeDim"], grp_Z["WeightedError_gmean"], marker="s")
            ax.set_xscale("linear")
            ax.set_yscale("log")
            ax.set_xlabel("Encode Dimension (z)")
            ax.set_ylabel("Weighted Prediction Error (MSE, geom. mean)")
            ax.set_title(f"{env} — EncodeDim vs Error {title_suffix}")
            ax.grid(True, which="both", ls="--", alpha=0.6)
            fig.tight_layout()
            out_path = os.path.join(out_dir, f"{env}_EncodeDim_vs_Error.png")
            plt.savefig(out_path, dpi=300)
            plt.close(fig)
            print(f"Saved: {out_path}")
        else:
            print(f"[Skip] No data for EncodeDim plot in {env}.")

Saved summary to Aug_8/evaluation_summary.csv


Unnamed: 0,Environment,TrainSamples,EncodeDim,UseCovLoss,UseControlLoss,WeightedError,NormalizedCovLoss
0,Franka,60000,17,0,0,3.453208e-06,4.605299e-05
1,Franka,60000,17,1,0,3.664425e-06,1.444889e-06
2,Franka,60000,17,0,1,3.969726e-06,0.0001742244
3,Franka,60000,17,1,1,4.469529e-06,1.128509e-06
4,Franka,60000,34,0,0,1.175875e-07,4.222307e-05
5,Franka,60000,34,1,0,1.065051e-07,9.452341e-07
6,Franka,60000,34,0,1,6.617872e-08,4.989505e-05
7,Franka,60000,34,1,1,1.028723e-07,8.238921e-07
8,Franka,60000,68,0,0,2.263802e-08,2.005329e-05
9,Franka,60000,68,1,0,2.486477e-08,3.350522e-07


Saved: Aug_8/Franka/Franka_TrainSamples_vs_Error.png
Saved: Aug_8/Franka/Franka_EncodeDim_vs_Error.png
