In [5]:
# Imports and setup
import os
import glob
import re
import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from torch import nn
import sys

# Add parent directories to sys.path so your modules can be imported
sys.path.append("../scripts")
sys.path.append("../utility")

# Import your custom modules
from network import KoopmanNet
from dataset import KoopmanDatasetCollector

# Set parameters here instead of using argparse
project_name = "Koopman_Results_Apr_8_2"   # Project name used during training
gamma = 0.8             # Gamma discount factor (e.g., 0.8 as in training)

# Create the main folder for figures if it doesn't exist
if not os.path.exists(project_name):
    os.makedirs(project_name)

In [6]:
def evaluate_model(model, data, u_dim, gamma, state_dim, device):
    """
    Given a model and test data, compute:
      - The weighted (gamma-decayed) multi-step prediction error (aggregated MSE),
      - The unweighted error at each prediction step (for plotting loss curves),
      - The normalized covariance loss on the learned encoding.
    """
    model.eval()
    with torch.no_grad():
        steps = data.shape[0]
        # Initialize the encoding from the first time step:
        if u_dim is None:
            X = model.encode(data[0].to(device))
        else:
            # For systems with control inputs, the network encodes the state portion only.
            X = model.encode(data[0, :, u_dim:].to(device))
            
        # Save initial encoding for covariance evaluation
        encoded_initial = X[:, state_dim:]
        
        weighted_loss = 0.0
        beta = 1.0
        beta_sum = 0.0
        step_errors = []
        # Iterate through prediction steps to compute error values
        for i in range(steps - 1):
            if u_dim is None:
                X = model.forward(X, None)
                target = data[i+1].to(device)
            else:
                X = model.forward(X, data[i, :, :u_dim].to(device))
                target = data[i+1, :, u_dim:].to(device)
            error = nn.MSELoss()(X[:, :state_dim], target)
            step_errors.append(error.item())
            weighted_loss += beta * error
            beta_sum += beta
            beta *= gamma
        weighted_loss /= beta_sum

        # Compute covariance loss on the encoded representation:
        z = encoded_initial  # shape: (num_trajectories, encode_dim)
        z_mean = torch.mean(z, dim=0, keepdim=True)
        z_centered = z - z_mean
        cov_matrix = (z_centered.t() @ z_centered) / (z_centered.size(0) - 1)
        diag_cov = torch.diag(torch.diag(cov_matrix))
        off_diag = cov_matrix - diag_cov
        cov_loss_val = torch.norm(off_diag, p='fro')**2
        encode_dim = X.shape[1] - state_dim
        normalized_cov_loss = (cov_loss_val.item() / (encode_dim * (encode_dim - 1))
                               if encode_dim > 1 else cov_loss_val.item())
    return weighted_loss.item(), step_errors, normalized_cov_loss


In [11]:
# Main evaluation

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define experimental parameters
envs = ['LogisticMap', 'DampingPendulum', 'Franka', 'DoublePendulum', 'Polynomial', 'G1', 'Go2']
encode_dims = [4, 16, 64, 256, 1024]
cov_regs = [0, 1]
seeds = [1, 2, 3, 4, 5]

# Directory containing best-model checkpoints (assumes training saved them in this naming format)
best_models_dir = os.path.join("..", "log", "best_models", project_name)
results = {}  # dictionary keyed by (env, encode_dim, cov_reg)

# The checkpoint file naming convention: 
# "best_model_norm_{env}_{encode_dim}_{cov_reg}_{seed}.pth"
pattern = r"best_model_norm_(\w+)_(\d+)_(\d+)_(\d+)\.pth"
all_files = glob.glob(os.path.join(best_models_dir, "best_model_norm_*.pth"))
for f in all_files:
    basename = os.path.basename(f)
    m = re.match(pattern, basename)
    if m:
        env_name = m.group(1)
        enc_dim = int(m.group(2))
        cov_reg_val = int(m.group(3))
        seed_val = int(m.group(4))
        # Skip the configuration (encode_dim==1 and cov_reg==1) as in training
        if enc_dim == 1 and cov_reg_val == 1:
            continue
        if enc_dim not in encode_dims:
            continue
        key = (env_name, enc_dim, cov_reg_val)
        if key not in results:
            results[key] = []
        results[key].append((seed_val, f))
        
# Prepare storage for summary table and step error curves (for curve plots)
summary = []        # List of dicts for table rows
step_error_curves = {}  # Key: (env, encode_dim, cov_reg) -> averaged step error list

# Loop over environments and evaluate configurations
for env in envs:
    # Set Ksteps (prediction horizon): for "Polynomial" and "LogisticMap" use horizon 1, otherwise 10.
    Ksteps = 1 if env in ["Polynomial", "LogisticMap"] else 10
    
    # Load the test dataset (assumes same naming convention as in train.py)
    norm_str = "norm"  # because normalize=True was used during training
    dataset_path = os.path.join("..", "data", "datasets", 
                                f"dataset_{env}_{norm_str}_Ktrain_60000_Kval_20000_Ktest_20000_Ksteps_{Ksteps}.pt")
    if not os.path.exists(dataset_path):
        print(f"Dataset file {dataset_path} not found for environment {env}, skipping.")
        continue
    data_dict = torch.load(dataset_path, weights_only=False)
    # Convert test data to tensor if necessary
    test_data = torch.from_numpy(data_dict["Ktest_data"]).float().to(device)

    # Determine state and control dimensions:
    if env in ["Franka", "DoublePendulum", "DampingPendulum", "G1", "Go2"]:
        if env == "Franka":
            u_dim = 7
            state_dim = test_data.shape[2] - u_dim
        elif env == "DoublePendulum":
            u_dim = 2
            state_dim = test_data.shape[2] - u_dim
        elif env == "DampingPendulum":
            u_dim = 1
            state_dim = test_data.shape[2] - u_dim
        elif env == "G1":
            u_dim = 37
            state_dim = test_data.shape[2] - u_dim
        elif env == "Go2":
            u_dim = 12
            state_dim = test_data.shape[2] - u_dim
        else:
            u_dim = None
            state_dim = test_data.shape[2]
    else:
        u_dim = None
        state_dim = test_data.shape[2]

    # Loop over each configuration for the current environment:
    for key in results:
        if key[0] != env:
            continue
        enc_dim, cov_reg_val = key[1], key[2]
        weighted_errors = []
        norm_cov_losses = []
        all_step_errors = []
        # Evaluate each seed run
        for (seed_val, filepath) in results[key]:
            checkpoint = torch.load(filepath, map_location=device)
            # Reconstruct network architecture:
            layers = checkpoint['layer']
            Nkoopman = state_dim + enc_dim
            model = KoopmanNet(layers, Nkoopman, u_dim)
            model.load_state_dict(checkpoint['model'])
            model.to(device)
            # Evaluate on test data
            weighted_err, step_errs, norm_cov_loss = evaluate_model(model, test_data, u_dim, gamma, state_dim, device)
            weighted_errors.append(weighted_err)
            norm_cov_losses.append(norm_cov_loss)
            all_step_errors.append(step_errs)
        if len(weighted_errors) == 0:
            continue
        avg_weighted_err = np.mean(weighted_errors)
        avg_norm_cov_loss = np.mean(norm_cov_losses)
        avg_step_errors = np.mean(all_step_errors, axis=0)
        summary.append({
            "Environment": env,
            "EncodeDim": enc_dim,
            "CovReg": cov_reg_val,
            "WeightedError": avg_weighted_err,
            "NormalizedCovLoss": avg_norm_cov_loss
        })
        step_error_curves[(env, enc_dim, cov_reg_val)] = avg_step_errors

# Create a summary table (using pandas) and save it as CSV.
df = pd.DataFrame(summary)
print("Summary Table:")
print(df)
table_csv_path = "evaluation_summary.csv"
df.to_csv(table_csv_path, index=False)
print(f"Summary table saved to {table_csv_path}")


Summary Table:
    Environment  EncodeDim  CovReg  WeightedError  NormalizedCovLoss
0   LogisticMap          4       0       0.404513           0.400132
1   LogisticMap        256       0       0.400263           0.000586
2   LogisticMap       1024       0       0.412784           0.003377
3   LogisticMap        256       1       0.399289           0.000039
4   LogisticMap          4       1       0.406822           0.000340
..          ...        ...     ...            ...                ...
65          Go2          4       1       0.192814           0.000002
66          Go2         16       0       0.187583           0.036239
67          Go2        256       0       0.161987           0.000250
68          Go2       1024       1       0.153294           0.000009
69          Go2        256       1       0.161545           0.000046

[70 rows x 5 columns]
Summary table saved to evaluation_summary.csv


In [None]:
# Plot 1: Average Multi-step Prediction Error vs. Encode Dimension

for env in envs:
    df_env = df[df["Environment"] == env]
    if df_env.empty:
        continue
    
    # Create output directory for this environment
    out_dir = os.path.join(project_name, env)
    os.makedirs(out_dir, exist_ok=True)
    
    plt.figure(figsize=(8, 6))
    for cov_reg_val in cov_regs:
        df_subset = df_env[df_env["CovReg"] == cov_reg_val]
        if df_subset.empty:
            continue
        df_subset = df_subset.sort_values(by="EncodeDim")
        x = df_subset["EncodeDim"].values
        y = df_subset["WeightedError"].values
        label = f"CovReg {'on' if cov_reg_val == 1 else 'off'}"
        plt.plot(x, y, marker='o', label=label)
    plt.xscale('log')
    plt.xlabel("Encode Dimension (log scale)")
    plt.ylabel("Average Multi-step Prediction Error (MSE)")
    plt.yscale('log')

    plt.title(f"Multi-step Prediction Error vs. Encode Dimension for {env}")
    plt.legend()
    plt.grid(True, which="both", ls="--")
    plt.tight_layout()
    fig_path = os.path.join(out_dir, f"MultiStepError_{env}.png")
    plt.savefig(fig_path, dpi=300)
    print(f"Saved plot: {fig_path}")
    plt.close()

# Plot 2: Normalized Covariance Loss vs. Encode Dimension

for env in envs:
    df_env = df[df["Environment"] == env]
    if df_env.empty:
        continue
    
    out_dir = os.path.join(project_name, env)
    os.makedirs(out_dir, exist_ok=True)
    
    plt.figure(figsize=(8, 6))
    for cov_reg_val in cov_regs:
        df_subset = df_env[df_env["CovReg"] == cov_reg_val]
        if df_subset.empty:
            continue
        df_subset = df_subset.sort_values(by="EncodeDim")
        x = df_subset["EncodeDim"].values
        y = df_subset["NormalizedCovLoss"].values
        label = f"CovReg {'on' if cov_reg_val == 1 else 'off'}"
        plt.plot(x, y, marker='o', label=label)
    plt.xscale('log')
    plt.xlabel("Encode Dimension (log scale)")
    plt.ylabel("Normalized Covariance Loss")
    plt.title(f"Normalized Covariance Loss vs. Encode Dimension for {env}")
    plt.legend()
    plt.grid(True, which="both", ls="--")
    plt.tight_layout()
    fig_path = os.path.join(out_dir, f"NormalizedCovLoss_{env}.png")
    plt.savefig(fig_path, dpi=300)
    print(f"Saved plot: {fig_path}")
    plt.close()

# Plot 3: Multi-step Loss Curves for each environment

for env in envs:
    out_dir = os.path.join(project_name, env)
    os.makedirs(out_dir, exist_ok=True)
    
    plt.figure(figsize=(10, 8))
    for cov_reg_val in cov_regs:
        for enc_dim in encode_dims:
            key = (env, enc_dim, cov_reg_val)
            if key in step_error_curves:
                step_errs = step_error_curves[key]
                steps_range = np.arange(1, len(step_errs) + 1)
                label = f"Enc: {enc_dim}, CovReg: {'on' if cov_reg_val == 1 else 'off'}"
                plt.plot(steps_range, step_errs, marker='o', label=label)
    plt.xlabel("Prediction Step")
    plt.ylabel("MSE Loss at Step")
    plt.title(f"Multi-step Loss Curves for {env}")
    plt.legend(fontsize='small', ncol=2)
    plt.grid(True, ls="--")
    plt.tight_layout()
    fig_path = os.path.join(out_dir, f"MultiStepLossCurves_{env}.png")
    plt.savefig(fig_path, dpi=300)
    print(f"Saved plot: {fig_path}")
    plt.close()
    
plt.show()


Saved plot: Koopman_Results_Apr_8_2/LogisticMap/MultiStepError_LogisticMap.png
Saved plot: Koopman_Results_Apr_8_2/DampingPendulum/MultiStepError_DampingPendulum.png
Saved plot: Koopman_Results_Apr_8_2/Franka/MultiStepError_Franka.png
Saved plot: Koopman_Results_Apr_8_2/DoublePendulum/MultiStepError_DoublePendulum.png
Saved plot: Koopman_Results_Apr_8_2/Polynomial/MultiStepError_Polynomial.png
Saved plot: Koopman_Results_Apr_8_2/G1/MultiStepError_G1.png
Saved plot: Koopman_Results_Apr_8_2/Go2/MultiStepError_Go2.png
Saved plot: Koopman_Results_Apr_8_2/LogisticMap/NormalizedCovLoss_LogisticMap.png
Saved plot: Koopman_Results_Apr_8_2/DampingPendulum/NormalizedCovLoss_DampingPendulum.png
Saved plot: Koopman_Results_Apr_8_2/Franka/NormalizedCovLoss_Franka.png
Saved plot: Koopman_Results_Apr_8_2/DoublePendulum/NormalizedCovLoss_DoublePendulum.png
Saved plot: Koopman_Results_Apr_8_2/Polynomial/NormalizedCovLoss_Polynomial.png
Saved plot: Koopman_Results_Apr_8_2/G1/NormalizedCovLoss_G1.png
Sav