In [None]:
import json
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from dysts.base import DATAPATH_CONTINUOUS, DynSys

from panda.utils.plot_utils import apply_custom_style

In [None]:
apply_custom_style("../../config/plotting.yaml")

In [None]:
WORK_DIR = os.environ.get("WORK", None)

In [None]:
figures_save_dir = "../../figures"
os.makedirs(figures_save_dir, exist_ok=True)

In [None]:
system_metrics_path = os.path.join(WORK_DIR, "eval_results", "dataset", "system_metrics.csv")

In [None]:
metrics_df = pd.read_csv(system_metrics_path)
metrics_df

In [None]:
lyap_times = metrics_df.max_lyap_r * metrics_df.avg_dt

pred_horizons = [128, 256, 512]
scaled_lyap_times = {}
for h in pred_horizons:
    scaled_lyap_times[h] = lyap_times * h

min_scaled = min([scaled.min() for scaled in scaled_lyap_times.values()])
max_scaled = max([scaled.max() for scaled in scaled_lyap_times.values()])
num_bins = 80
bins = np.linspace(min_scaled, max_scaled, num_bins)

plt.figure(figsize=(4, 4))
plt.title("Lyapunov Times", fontweight="bold")
plt.ylabel("Count", fontweight="bold")

cmap = plt.get_cmap("cividis")
colors = [cmap(0.1 + 0.8 * (i / (len(pred_horizons) - 1))) for i in range(len(pred_horizons))]
for i, (h, scaled) in enumerate(scaled_lyap_times.items()):
    pct = (scaled >= 1.0).mean() * 100
    plt.hist(
        scaled,
        bins=bins,
        histtype="stepfilled",
        log=True,
        alpha=0.6,
        label=rf"$L_{{\mathrm{{pred}}}} = {h}$  ({pct:.0f}% $\geq T_{{\mathrm{{Lyap}}}}$)",
        zorder=10 - i,
        color=colors[i],
    )

plt.legend()
plt.tight_layout()
save_path = f"{figures_save_dir}/lyapunov_times.pdf"
print(f"Saving figure to {save_path}")
plt.savefig(save_path, bbox_inches="tight")

In [None]:
plt.figure(figsize=(4, 4))
plt.title("Stiffness Score", fontweight="bold")
plt.ylabel("Count", fontweight="bold")
plt.hist(
    np.log10(metrics_df.stiffness),
    histtype="stepfilled",
    bins=60,
    log=True,
    alpha=0.6,
    color=colors[0],
)
plt.tight_layout()
save_path = f"{figures_save_dir}/stiffness_score.pdf"
print(f"Saving figure to {save_path}")
plt.savefig(save_path, bbox_inches="tight")

In [None]:
def read_lyapunov_json_to_dataframe(file_path: str) -> pd.DataFrame:
    """
    Read Lyapunov exponent data from JSON file and convert to DataFrame.

    The JSON file is expected to have the structure:
    {
        "4096": [
            ["system_name", {"max_lyap_rosenstein": value}],
            ...
        ]
    }

    Args:
        file_path (str): Path to the JSON file containing Lyapunov exponent data

    Returns:
        pd.DataFrame: DataFrame with columns ['system_name', 'max_lyap_rosenstein']
    """
    with open(file_path) as f:
        data = json.load(f)

    # Extract the data from the "4096" key
    if "4096" not in data:
        raise ValueError("Expected '4096' key in JSON file")

    lyapunov_data = data["4096"]

    records = []
    for system_name, metrics in lyapunov_data:
        records.append(
            {
                "system_name": system_name,
                "max_lyap_rosenstein": metrics["max_lyap_rosenstein"],
            }
        )

    df = pd.DataFrame(records)
    return df


def extract_system_info(df: pd.DataFrame) -> pd.DataFrame:
    """
    Extract system type and parameter number from system names.

    Args:
        df (pd.DataFrame): DataFrame with 'system_name' column

    Returns:
        pd.DataFrame: DataFrame with additional columns 'system_type' and 'param_number'
    """
    df = df.copy()
    df["system_type"] = df["system_name"].str.rsplit("_", n=1).str[0]
    df["param_number"] = df["system_name"].str.extract(r"pp(\d+)").astype(int)
    return df


def get_system_summary(df: pd.DataFrame) -> pd.DataFrame:
    """
    Get summary statistics for each system type.

    Args:
        df (pd.DataFrame): DataFrame with Lyapunov exponent data

    Returns:
        pd.DataFrame: Summary statistics by system type
    """
    if "system_type" not in df.columns:
        df = extract_system_info(df)

    summary = (
        df.groupby("system_type")["max_lyap_rosenstein"].agg(["count", "mean", "std", "min", "max", "median"]).round(6)
    )

    summary.columns = ["count", "mean", "std", "min", "max", "median"]
    return summary


def filter_by_system_type(df: pd.DataFrame, system_type: str) -> pd.DataFrame:
    """
    Filter DataFrame to include only data for a specific system type.

    Args:
        df (pd.DataFrame): DataFrame with Lyapunov exponent data
        system_type (str): System type to filter by

    Returns:
        pd.DataFrame: Filtered DataFrame
    """
    if "system_type" not in df.columns:
        df = extract_system_info(df)

    return df[df["system_type"] == system_type]


def get_periods_from_systems(file_paths: list[str]) -> dict[str, float]:
    """
    Compute the periods of the skew systems from multiple dysts parameter files.

    Args:
        file_paths (Sequence[str]): List of JSON file paths containing system keys.

    Returns:
        dict[str, float]: Dictionary mapping system names to their periods.
    """
    systems = set()

    for file_path in file_paths:
        dir_names = [name for name in os.listdir(file_path) if os.path.isdir(os.path.join(file_path, name))]
        systems.update(dir_names)

    def _base_period(sys: str) -> float:
        metadata = DynSys.load_system_metadata(sys, DATAPATH_CONTINUOUS)
        return metadata["period"]

    def _skew_period(sys: str) -> float:
        sys1, sys2 = sys.split("_")
        return max(_base_period(sys1), _base_period(sys2))

    periods = {sys: _skew_period(sys) if "_" in sys else _base_period(sys) for sys in systems}
    return periods


def augment_with_lyapunov_times(
    df: pd.DataFrame, system_periods: dict[str, float], period_scaling: float
) -> pd.DataFrame:
    """
    Augment DataFrame with Lyapunov times in window columns.

    Args:
        df (pd.DataFrame): DataFrame with 'system_type' and 'max_lyap_rosenstein' columns
        system_periods (dict[str, float]): Dictionary mapping system types to periods
        period_scaling (float): Scaling factor for period (e.g., 40/4096)

    Returns:
        pd.DataFrame: DataFrame with additional columns for Lyapunov times in different window sizes
    """
    df = df.copy()

    # Calculate timestep (dt) for each system type
    df["timestep"] = df["system_type"].map(system_periods) * period_scaling

    # Calculate Lyapunov times in window of N timepoints
    # Formula: N × dt × max_lyapunov_exponent
    df["lyap_times_1"] = 1 * df["timestep"] * df["max_lyap_rosenstein"]
    df["lyap_times_128"] = 128 * df["timestep"] * df["max_lyap_rosenstein"]
    df["lyap_times_256"] = 256 * df["timestep"] * df["max_lyap_rosenstein"]
    df["lyap_times_512"] = 512 * df["timestep"] * df["max_lyap_rosenstein"]

    return df


def plot_histogram(
    data: np.ndarray,
    title: str,
    save_path: str | None = None,
    bins: int = 30,
) -> None:
    """
    Plot a histogram of the input array.

    Args:
        data (np.ndarray): Input data array.
        title (str): Title for the plot.
        save_path (str | None, optional): If provided, save the plot to this path. Otherwise, display the plot.
        bins (int, optional): Number of histogram bins. Default is 30.
    """
    plt.figure(figsize=(6, 4))
    plt.hist(data, bins=bins, color="C0", edgecolor="black", alpha=0.8)
    plt.title(title)
    plt.xlabel("Lyapunov times")
    plt.ylabel("Frequency")
    plt.tight_layout()
    if save_path is not None:
        plt.savefig(save_path, dpi=150)
        plt.close()
    else:
        plt.show()

In [None]:
file_path = "data/max_lyap_r_test_zeroshot.json"
df = read_lyapunov_json_to_dataframe(file_path)
df_with_info = extract_system_info(df)

work_base = os.environ.get("WORK", None)
skew_dir_path = f"{work_base}/data/improved/final_skew40/test_zeroshot"
skew_dir_path_z5_z10 = f"{work_base}/data/improved/final_skew40/test_zeroshot_z5_z10"
base_dir_path = f"{work_base}/data/improved/final_base40/test_zeroshot"
periods = get_periods_from_systems([skew_dir_path, skew_dir_path_z5_z10, base_dir_path])

period_scaling = 40 / 4096
df_augmented = augment_with_lyapunov_times(df_with_info, periods, period_scaling)

print(f"\nAugmented DataFrame shape: {df_augmented.shape}")
print(f"New columns: {list(df_augmented.columns)}")
print("\nFirst few rows with Lyapunov times:")
print(
    df_augmented[
        [
            "system_name",
            "system_type",
            "max_lyap_rosenstein",
            "timestep",
            "lyap_times_1",
            "lyap_times_128",
            "lyap_times_256",
            "lyap_times_512",
        ]
    ].head()
)

df_augmented.to_csv("../../data/lyapunov_analysis_data.csv", index=False)

In [None]:
df = pd.read_csv("../../data/lyapunov_analysis_data.csv")

print(df.head())

In [None]:
thresholds = [1, 10, 100, 1000]
columns = ["lyap_times_1", "lyap_times_128", "lyap_times_256", "lyap_times_512"]
for thresh, col in zip(thresholds, columns):
    n = int(col.split("_")[-1])
    data = df[col].clip(lower=0)
    filtered_data = data[data < thresh]
    counts, bin_edges = np.histogram(data, bins=10)

    percentages = counts / counts.sum() * 100
    for i in range(len(counts)):
        left = bin_edges[i]
        right = bin_edges[i + 1]
        count = counts[i]
        perc = percentages[i]
        print(f"[{left:.2f}, {right:.2f}): Count = {count}, Percentage = {perc:.1f}%")

    plot_histogram(filtered_data, title=f"lyapunov times @ N={n}", bins=1000)