In [3]:
import os
import sys
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
from IPython.display import display
from scipy.optimize import curve_fit

# ============================
# Configuration
# ============================
PROJECT_NAME = "Sep_21"

ENVS          = ["DampingPendulum", "Franka", "DoublePendulum", "Polynomial", "Kinova", "G1", "Go2"]
SEEDS         = [17382, 76849, 20965, 84902, 51194]
TRAIN_SAMPLES = [1000, 4000, 16000, 64000, 140000]
M_POLY        = 100

U_DIM = {"Franka": 7, "DoublePendulum": 2, "DampingPendulum": 1, "G1": 23, "Go2": 12, "Kinova": 7}
NORMALIZE = {"G1": "norm", "Go2": "norm"}
REL_MULT_TARGETS = [1, 2, 4, 8, 16]

# ============================
# Helpers
# ============================
def gmean(vals, eps: float = 1e-12) -> float:
    arr = np.asarray(list(vals), dtype=float)
    arr = np.maximum(arr, eps)
    return float(np.exp(np.mean(np.log(arr))))

def env_has_control(env: str) -> bool:
    return U_DIM.get(env, 0) > 0

def find_dataset_path(env: str, m_val: int, ksteps: int = 15) -> str | None:
    norm = NORMALIZE.get(env, "nonorm")
    base_path = os.path.join("..", "data", "datasets")
    if env == "Polynomial":
        path = os.path.join(base_path, f"dataset_{env}_{norm}_m_{m_val}_Ktrain_140000_Kval_20000_Ktest_20000_Ksteps_1.pt")
    else:
        path = os.path.join(base_path, f"dataset_{env}_{norm}_Ktrain_140000_Kval_20000_Ktest_20000_Ksteps_{ksteps}.pt")
    return path if os.path.exists(path) else None

# ============================
# 1) Load and Validate CSV Log
# ============================
log_csv = os.path.join("..", "log", PROJECT_NAME, "koopman_results_log.csv")
assert os.path.exists(log_csv), f"‚ùå CSV log not found: {log_csv}"
log = pd.read_csv(log_csv)

required_cols = ["env_name", "seed", "train_samples", "encode_dim", "test_Kloss", "test_CovLoss", "encode_dim_param"]
for col in required_cols:
    assert col in log.columns, f"‚ùå Log file is missing required column: '{col}'"

# ============================
# 2) Infer State Dimension for Each Environment
# ============================
env_state_dim: dict[str, int | None] = {}
for env in ENVS:
    ds_path = find_dataset_path(env, M_POLY)
    if ds_path is None:
        env_state_dim[env] = None
        continue
    data = torch.load(ds_path, weights_only=False)
    full_dim = int(data["Ktest_data"].shape[2])
    u = U_DIM.get(env, 0)
    env_state_dim[env] = full_dim - u

# ============================
# 3) Filter Log for Relevant Experiments
# ============================
mask = (
    log['env_name'].isin(ENVS) &
    log['seed'].isin(SEEDS) &
    log['train_samples'].isin(TRAIN_SAMPLES) &
    log['encode_dim_param'].isin(REL_MULT_TARGETS) &
    ((log['env_name'] != 'Polynomial') | (log['m'] == M_POLY))
)
filtered_log = log[mask].copy()

if filtered_log.empty:
    print("‚ùå No matching experiments found in the log after filtering. Please check your configuration.")
    raise SystemExit(0)

# ============================
# 4) Aggregate Results for Summary Table (df_agg)
# ============================
def normalize_cov(row):
    zdim = row['encode_dim']
    if zdim <= 1:
        return row['test_CovLoss']
    denominator = zdim * (zdim - 1)
    return row['test_CovLoss'] / denominator

filtered_log['NormalizedCovariance'] = filtered_log.apply(normalize_cov, axis=1)

grouping_keys = [
    "env_name", "train_samples", "encode_dim",
    "use_covariance_loss", "use_control_loss"
]
df_agg = filtered_log.groupby(grouping_keys).agg(
    PredictionError=("test_Kloss", gmean),
    NormalizedCovariance=("NormalizedCovariance", gmean)
).reset_index()

df = df_agg.rename(columns={
    "env_name": "Environment",
    "train_samples": "TrainSamples",
    "encode_dim": "EncodeDim",
    "use_covariance_loss": "UseCovLoss",
    "use_control_loss": "UseControlLoss"
})

# ============================
# 5) Save Summary CSV
# ============================
os.makedirs(PROJECT_NAME, exist_ok=True)
out_csv = os.path.join(PROJECT_NAME, "evaluation_summary.csv")
df.to_csv(out_csv, index=False)
print(f"‚úÖ Saved summary to {out_csv}")
if not df.empty:
    display(df)

# ============================
# 6) Per-Environment Plots
# ============================
if not df.empty:
    for env in ENVS:
        sub = df[(df.Environment == env) & (df.UseCovLoss == 1)]
        if env_has_control(env) and "UseControlLoss" in df.columns:
            sub = sub[sub.UseControlLoss == 1]
        tag = "(cov=1, ctrl=1)" if env_has_control(env) else "(cov=1)"
        if sub.empty: continue

        out_dir = os.path.join(PROJECT_NAME, env); os.makedirs(out_dir, exist_ok=True)

        if not sub.empty and 'EncodeDim' in sub.columns:
            max_encode_dim = sub['EncodeDim'].max()
            sub_for_ts_plot = sub[sub['EncodeDim'] == max_encode_dim]
            
            gA = sub_for_ts_plot.groupby("TrainSamples", as_index=False).agg(Error_gmean=("PredictionError", gmean)).sort_values("TrainSamples")
            if not gA.empty:
                fig, ax = plt.subplots(figsize=(8, 6))
                ax.plot(gA["TrainSamples"], gA["Error_gmean"], marker="o")
                title = f"{env} ‚Äî TrainSamples vs Error\n(at largest EncodeDim: {max_encode_dim}) {tag}"
                ax.set(yscale="log", xlabel="Train Samples (Ktrain)", ylabel="Prediction Error (MSE, geom. mean)", title=title)
                ax.grid(True, which="both", ls="--", alpha=0.6); fig.tight_layout()
                p = os.path.join(out_dir, f"{env}_TrainSamples_vs_Error_LargestEncDim.png"); plt.savefig(p, dpi=300); plt.close(fig)
                print(f"üñºÔ∏è Saved plot: {p}")

        if not sub.empty and 'TrainSamples' in sub.columns:
            max_train_samples = sub['TrainSamples'].max()
            sub_for_ed_plot = sub[sub['TrainSamples'] == max_train_samples]

            gB = sub_for_ed_plot.groupby("EncodeDim", as_index=False).agg(Error_gmean=("PredictionError", gmean)).sort_values("EncodeDim")
            if not gB.empty:
                fig, ax = plt.subplots(figsize=(8, 6))
                ax.plot(gB["EncodeDim"], gB["Error_gmean"], marker="s")
                title = f"{env} ‚Äî EncodeDim vs Error\n(at largest TrainSamples: {max_train_samples}) {tag}"
                ax.set(xscale="linear", yscale="log", xlabel="Encode Dimension (z)", ylabel="Prediction Error (MSE, geom. mean)", title=title)
                ax.grid(True, which="both", ls="--", alpha=0.6); fig.tight_layout()
                p = os.path.join(out_dir, f"{env}_EncodeDim_vs_Error_LargestTrainSamples.png"); plt.savefig(p, dpi=300); plt.close(fig)
                print(f"üñºÔ∏è Saved plot: {p}")


# ============================
# 7) Combined Plot: Normalized Error vs. Relative Multiplier
# ============================
# **MODIFIED**: This section now filters for the largest training sample size.
if not df.empty:
    def nearest_rel_mult(env: str, z_abs: float) -> float:
        st = env_state_dim.get(env, None)
        if not st or st <= 0: return float("nan")
        r = float(z_abs) / float(st)
        return float(min(REL_MULT_TARGETS, key=lambda m: abs(m - r)))

    rows = []
    for env in ENVS:
        sub = df[(df.Environment == env) & (df.UseCovLoss == 1)]
        if env_has_control(env) and "UseControlLoss" in df.columns:
            sub = sub[sub.UseControlLoss == 1]
        if sub.empty: continue

        # **NEW**: Filter the subset to only include runs with the largest sample size
        max_train_samples = sub['TrainSamples'].max()
        sub = sub[sub['TrainSamples'] == max_train_samples]

        sub = sub.copy()
        sub["RelMult"] = sub["EncodeDim"].apply(lambda z: nearest_rel_mult(env, z))
        sub = sub.replace([np.inf, -np.inf], np.nan).dropna(subset=["RelMult"])
        
        # Now, groupby operates on the data for the largest training set
        g = sub.groupby("RelMult", as_index=False).agg(PredictionError_gmean=("PredictionError", gmean)).sort_values("RelMult")
        if g.empty: continue

        E0 = g.loc[g["RelMult"].idxmin(), "PredictionError_gmean"]
        g["RelError"] = g["PredictionError_gmean"] / max(E0, 1e-12)
        if g.shape[0] >= 2:
            x = np.log(g["RelMult"].to_numpy(dtype=float)); y = np.log(np.maximum(g["RelError"].to_numpy(dtype=float), 1e-12))
            b1, _ = np.polyfit(x, y, 1); g["Slope"] = float(b1)
        else:
            g["Slope"] = float("nan")

        k = min(2, len(g)); g["NoiseRel"] = float(g.tail(k)["RelError"].mean())
        rows.append(g.assign(Environment=env))

    if rows:
        GG = pd.concat(rows, ignore_index=True)
        fig, ax = plt.subplots(figsize=(8, 6))
        for env in sorted(GG["Environment"].unique()):
            ge = GG[GG.Environment == env].sort_values("RelMult")
            (line,) = ax.plot(ge["RelMult"], ge["RelError"], marker="o", label=f"{env} (slope={ge['Slope'].iloc[0]:.2f})")
            ax.hlines(ge["NoiseRel"].iloc[0], ge["RelMult"].min(), ge["RelMult"].max(), linestyles="dashed", alpha=0.4, colors=[line.get_color()])
        try:
            ax.set_xscale("log", base=2)
        except TypeError:
            ax.set_xscale("log", basex=2)
            
        title = "Normalized Error vs. Relative Multiplier (Largest TrainSamples)"
        ax.set(xlabel="Relative encode multiplier (z / state_dim, log‚ÇÇ)", ylabel="Relative prediction error (E / E@min multiplier)", title=title)
        ax.grid(True, which="both", ls="--", alpha=0.6); ax.legend(ncol=2, fontsize=9); fig.tight_layout()
        p = os.path.join(PROJECT_NAME, "AllEnvs_RelError_vs_RelMultiplier_LargestTrainSamples.png"); plt.savefig(p, dpi=300); plt.close(fig)
        print(f"üñºÔ∏è Saved combined plot: {p}")

# ============================
# 8) Scaling-Law Fits per Environment
# ============================
if not filtered_log.empty:
    def scaling_model(D, A, alpha, C):
        return A * np.power(D, -alpha) + C

    fit_results = []
    for env in ENVS:
        sub = filtered_log[(filtered_log.env_name == env) & (filtered_log.use_covariance_loss == 1)]
        if env_has_control(env) and "use_control_loss" in filtered_log.columns:
            sub = sub[sub.use_control_loss == 1]
        
        if sub.empty:
            continue

        max_train_samples = sub['train_samples'].max()
        sub = sub[sub['train_samples'] == max_train_samples]
        
        if sub['encode_dim'].nunique() < 3:
            print(f"‚ö†Ô∏è Skipping scaling-law for {env}: not enough unique encode_dim points at largest sample size ({max_train_samples}).")
            continue

        D, E = sub["encode_dim"].astype(float).to_numpy(), sub["test_Kloss"].astype(float).to_numpy()

        try:
            popt, pcov = curve_fit(scaling_model, D, E, p0=[E.max(), 0.7, E.min()],
                                   bounds=([0.0, 0.0, 0.0], [np.inf, 4.0, np.inf]), maxfev=20000)
            A_hat, alpha_hat, C_hat = popt
            E_pred = scaling_model(D, A_hat, alpha_hat, C_hat)
            ss_res = np.sum((E - E_pred) ** 2)
            ss_tot = np.sum((E - np.mean(E)) ** 2)
            R2_lin = 1.0 - ss_res / ss_tot if ss_tot > 0 else float("nan")

            fit_results.append({"Environment": env, "A": A_hat, "alpha": alpha_hat, "C": C_hat, "R2_linear": R2_lin})

            out_dir = os.path.join(PROJECT_NAME, env); os.makedirs(out_dir, exist_ok=True)
            fig, ax = plt.subplots(figsize=(7, 5))

            ax.plot(D, E, "o", label="Data (all seeds)", alpha=0.5)

            D_fit = np.geomspace(D.min(), D.max(), 256)
            E_fit = scaling_model(D_fit, A_hat, alpha_hat, C_hat)
            ax.plot(D_fit, E_fit, "-", color='red', linewidth=2, label=f"Fit: A={A_hat:.3g}, Œ±={alpha_hat:.2f}, C={C_hat:.3g} (R¬≤={R2_lin:.2f})")
            
            try:
                ax.set_xscale("log", base=2)
            except TypeError:
                ax.set_xscale("log", basex=2)

            title = f"{env} ‚Äî Scaling Law Fit (Largest TrainSamples: {max_train_samples})"
            ax.set(yscale="log", xlabel="Encode Dimension (z)", ylabel="Prediction Error (MSE)", title=title)
            ax.grid(True, which="both", ls="--", alpha=0.6); ax.legend(fontsize=9); fig.tight_layout()
            p = os.path.join(out_dir, f"{env}_ScalingLawFit_LargestTrainSamples.png"); plt.savefig(p, dpi=300); plt.close(fig)
            print(f"üñºÔ∏è Saved scaling-law plot: {p}")

        except Exception as ex:
            print(f"‚ö†Ô∏è Could not fit scaling law for {env}: {ex}")

    if fit_results:
        df_fits = pd.DataFrame(fit_results)
        p = os.path.join(PROJECT_NAME, "scaling_law_fits_largest_train_samples.csv"); df_fits.to_csv(p, index=False)
        print(f"‚úÖ Saved scaling-law fit table: {p}")
        display(df_fits)

‚úÖ Saved summary to Sep_21/evaluation_summary.csv


Unnamed: 0,Environment,TrainSamples,EncodeDim,UseCovLoss,UseControlLoss,PredictionError,NormalizedCovariance
0,DampingPendulum,1000,2,False,False,0.014942,6.611577e+05
1,DampingPendulum,1000,2,False,True,0.097864,2.747380e+03
2,DampingPendulum,1000,2,True,False,0.020307,2.094539e-07
3,DampingPendulum,1000,2,True,True,0.150161,1.223969e-04
4,DampingPendulum,1000,4,False,False,0.022343,1.270932e+04
...,...,...,...,...,...,...,...
645,Polynomial,140000,12,True,False,0.000631,9.368436e-08
646,Polynomial,140000,24,False,False,0.000469,4.363544e+00
647,Polynomial,140000,24,True,False,0.000462,1.460223e-03
648,Polynomial,140000,48,False,False,0.000444,2.988363e+00


üñºÔ∏è Saved plot: Sep_21/DampingPendulum/DampingPendulum_TrainSamples_vs_Error_LargestEncDim.png
üñºÔ∏è Saved plot: Sep_21/DampingPendulum/DampingPendulum_EncodeDim_vs_Error_LargestTrainSamples.png
üñºÔ∏è Saved plot: Sep_21/Franka/Franka_TrainSamples_vs_Error_LargestEncDim.png
üñºÔ∏è Saved plot: Sep_21/Franka/Franka_EncodeDim_vs_Error_LargestTrainSamples.png
üñºÔ∏è Saved plot: Sep_21/DoublePendulum/DoublePendulum_TrainSamples_vs_Error_LargestEncDim.png
üñºÔ∏è Saved plot: Sep_21/DoublePendulum/DoublePendulum_EncodeDim_vs_Error_LargestTrainSamples.png
üñºÔ∏è Saved plot: Sep_21/Polynomial/Polynomial_TrainSamples_vs_Error_LargestEncDim.png
üñºÔ∏è Saved plot: Sep_21/Polynomial/Polynomial_EncodeDim_vs_Error_LargestTrainSamples.png
üñºÔ∏è Saved plot: Sep_21/Kinova/Kinova_TrainSamples_vs_Error_LargestEncDim.png
üñºÔ∏è Saved plot: Sep_21/Kinova/Kinova_EncodeDim_vs_Error_LargestTrainSamples.png
üñºÔ∏è Saved plot: Sep_21/G1/G1_TrainSamples_vs_Error_LargestEncDim.png
üñºÔ∏è Saved plot

Unnamed: 0,Environment,A,alpha,C,R2_linear
0,DampingPendulum,0.912926,2.79333,0.008849887,0.778184
1,Franka,0.000254,1.57769,9.740425e-08,0.83099
2,DoublePendulum,0.011816,2.170697,0.0167013,0.611817
3,Polynomial,0.00047,0.253914,0.0002753584,0.330634
4,Kinova,3.516526,0.882522,1.2756189999999998e-19,0.967441
5,G1,0.157902,0.491844,0.1283856,0.998491
6,Go2,0.319514,0.747842,0.2051753,0.999225


In [29]:
df[df['Environment']=='DampingPendulum'][['TrainSamples', 'PredictionError', 'NormalizedCovariance']].groupby("TrainSamples").mean()

Unnamed: 0_level_0,PredictionError,NormalizedCovariance
TrainSamples,Unnamed: 1_level_1,Unnamed: 2_level_1
1000,0.031757,57606.62663
4000,0.022341,29880.345943
16000,0.020648,29513.165659
64000,0.021046,31029.251127
140000,0.01974,3989.715538
