In [None]:
import numpy as np
import cv2
import os
from scipy.integrate import odeint
from scipy.special import binom
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import pickle
import torch.nn.functional as F

In [None]:
def generate_pendulum_data(n_ics, sequence_length):
    """
    Generates a dataset where each input consists of a sequence of images
    representing a pendulum's motion over time.

    Args:
        n_ics (int): Number of different pendulum trajectories (initial conditions).
        sequence_length (int): Number of time steps in each sequence.

    Returns:
        data (dict): Dictionary containing sequences of images, velocities,
                     accelerations, and latent variables.
    """

    # Define pendulum dynamics: θ'' = -sin(θ)
    f = lambda z, t: [z[1], -np.sin(z[0])]

    # Define time grid
    t = np.arange(0, 10, .02)  # Total time steps
    T = len(t)  # Number of total time steps

    # Pre-allocate arrays for state variables
    z = np.zeros((n_ics, T, 2))  # (n_ics, time_steps, [θ, ω])
    dz = np.zeros(z.shape)       # (n_ics, time_steps, [dθ/dt, dω/dt])

    # Define random initial conditions for the pendulum
    z1range = np.array([-np.pi, np.pi])
    z2range = np.array([-1.8, 1.8])

    i = 0
    while i < n_ics:
        z0 = np.array([
            (z1range[1] - z1range[0]) * np.random.rand() + z1range[0],  # Random θ
            (z2range[1] - z2range[0]) * np.random.rand() + z2range[0]   # Random ω
        ])

        # Energy constraint check (to ensure valid oscillation behavior)
        if np.abs(z0[1]**2 / 2. - np.cos(z0[0])) > 0.99:
            continue

        # Solve pendulum dynamics
        z[i] = odeint(f, z0, t)
        dz[i] = np.array([f(z[i, j], t[j]) for j in range(T)])
        i += 1

    # Convert pendulum motion into image sequences
    x, dx, ddx = pendulum_to_movie(z, dz)

    # Reshape into sequence format
    num_sequences = T - sequence_length + 1
    dataset = {
        'x': np.array([x[:, i:i+sequence_length] for i in range(num_sequences)]),
        'dx': np.array([dx[:, i:i+sequence_length] for i in range(num_sequences)]),
        'ddx': np.array([ddx[:, i:i+sequence_length] for i in range(num_sequences)]),
        'z': np.array([z[:, i:i+sequence_length, 0] for i in range(num_sequences)]),
        'dz': np.array([z[:, i:i+sequence_length, 1] for i in range(num_sequences)])
    }

    return dataset


def pendulum_to_movie(z, dz):
    """
    Converts pendulum motion into images.

    Args:
        z (numpy.ndarray): The pendulum state variables (θ, ω).
        dz (numpy.ndarray): The time derivatives of the state variables.

    Returns:
        x, dx, ddx (numpy.ndarray): Image sequences representing position, velocity, and acceleration.
    """

    n_ics, T, _ = z.shape  # Number of initial conditions and time steps
    n = 51  # Image size

    # Define a 2D mesh grid for creating images
    y1, y2 = np.meshgrid(np.linspace(-1.5, 1.5, n), np.linspace(1.5, -1.5, n))

    # Functions to generate images
    create_image = lambda theta: np.exp(-((y1 - np.cos(theta - np.pi/2))**2 + (y2 - np.sin(theta - np.pi/2))**2) / .05)
    argument_derivative = lambda theta, dtheta: -1/.05 * (
        2 * (y1 - np.cos(theta - np.pi/2)) * np.sin(theta - np.pi/2) * dtheta +
        2 * (y2 - np.sin(theta - np.pi/2)) * (-np.cos(theta - np.pi/2)) * dtheta
    )

    # Allocate memory for images
    x = np.zeros((n_ics, T, n, n))
    dx = np.zeros((n_ics, T, n, n))
    ddx = np.zeros((n_ics, T, n, n))

    for i in range(n_ics):
        for j in range(T):
            x[i, j] = create_image(z[i, j, 0])
            dx[i, j] = create_image(z[i, j, 0]) * argument_derivative(z[i, j, 0], dz[i, j, 0])
            ddx[i, j] = create_image(z[i, j, 0]) * (argument_derivative(z[i, j, 0], dz[i, j, 0])**2)

    return x, dx, ddx

In [None]:
class PendulumDataset(Dataset):
    """
    Args:
        data (dict): Dictionary containing 'x', 'dx', 'ddx', 'z', 'dz' sequences.
        transform (callable, optional): Optional transform to apply to samples.
    """
    def __init__(self, data, transform=None):
        self.x = data['x']      # Shape: (num_sequences, n_ics, seq_len, 51, 51)
        self.dx = data['dx']
        self.ddx = data['ddx']
        self.z = data['z']      # Latent representation (θ)
        self.dz = data['dz']    # Latent velocity (ω)
        self.transform = transform

        # Reshape data: (num_sequences, n_ics, seq_len, height, width) → (num_samples, seq_len, 1, height, width)
        self.x = self.x.reshape(-1, self.x.shape[2], 1, self.x.shape[3], self.x.shape[4])
        self.dx = self.dx.reshape(-1, self.dx.shape[2], 1, self.dx.shape[3], self.dx.shape[4])
        self.ddx = self.ddx.reshape(-1, self.ddx.shape[2], 1, self.ddx.shape[3], self.ddx.shape[4])
        self.z = self.z.reshape(-1, self.z.shape[2], 1)
        self.dz = self.dz.reshape(-1, self.dz.shape[2], 1)

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        sample = {
            'x': torch.tensor(self.x[idx], dtype=torch.float32),   # Shape: (seq_len, 1, 51, 51)
            'dx': torch.tensor(self.dx[idx], dtype=torch.float32),
            'ddx': torch.tensor(self.ddx[idx], dtype=torch.float32),
            'z': torch.tensor(self.z[idx], dtype=torch.float32),   # Shape: (seq_len, 1)
            'dz': torch.tensor(self.dz[idx], dtype=torch.float32)  # Shape: (seq_len, 1)
        }
        if self.transform:
            sample = self.transform(sample)
        return sample

In [None]:
def library_size(n, poly_order, use_sine=False, include_constant=True):
    """
    Compute the size of the SINDy library for a given number of variables and polynomial order.
    """
    l = 0
    for k in range(poly_order + 1):
        l += int(binom(n + k - 1, k))  # Binomial coefficient for polynomial terms
    if use_sine:
        l += n  # Add sine terms
    if not include_constant:
        l -= 1  # Remove constant term if needed
    return l


def sindy_library(X, poly_order, include_sine=False):
    """
    Build the SINDy library for a sequence-based dataset.

    Args:
        X (numpy.ndarray): Input data of shape (num_samples, num_features).
        poly_order (int): Maximum polynomial order to include.
        include_sine (bool): Whether to include sine terms.

    Returns:
        numpy.ndarray: The constructed SINDy library.
    """
    m, n = X.shape  # Number of samples, number of features
    l = library_size(n, poly_order, include_sine, True)  # Compute library size
    library = np.ones((m, l))  # Initialize library with constant term
    index = 1

    # First-order terms
    for i in range(n):
        library[:, index] = X[:, i]
        index += 1

    # Higher-order polynomial terms
    if poly_order > 1:
        for i in range(n):
            for j in range(i, n):
                library[:, index] = X[:, i] * X[:, j]
                index += 1

    if poly_order > 2:
        for i in range(n):
            for j in range(i, n):
                for k in range(j, n):
                    library[:, index] = X[:, i] * X[:, j] * X[:, k]
                    index += 1

    if poly_order > 3:
        for i in range(n):
            for j in range(i, n):
                for k in range(j, n):
                    for q in range(k, n):
                        library[:, index] = X[:, i] * X[:, j] * X[:, k] * X[:, q]
                        index += 1

    # Sine terms
    if include_sine:
        for i in range(n):
            library[:, index] = np.sin(X[:, i])
            index += 1

    return library


def sindy_fit(RHS, LHS, coefficient_threshold):
    """
    Solve the sparse regression problem to discover governing equations.

    Args:
        RHS (numpy.ndarray): The SINDy library matrix.
        LHS (numpy.ndarray): The derivatives to fit.
        coefficient_threshold (float): The threshold for coefficient pruning.

    Returns:
        numpy.ndarray: The discovered sparse coefficient matrix.
    """
    m, n = LHS.shape
    Xi = np.linalg.lstsq(RHS, LHS, rcond=None)[0]  # Initial least squares fit

    # Sequential Thresholding
    for k in range(10):
        small_inds = (np.abs(Xi) < coefficient_threshold)
        Xi[small_inds] = 0  # Zero out small coefficients
        for i in range(n):
            big_inds = ~small_inds[:, i]
            if np.where(big_inds)[0].size == 0:
                continue
            Xi[big_inds, i] = np.linalg.lstsq(RHS[:, big_inds], LHS[:, i], rcond=None)[0]

    return Xi


def sindy_simulate(x0, t, Xi, poly_order, include_sine):
    """
    Simulate a discovered SINDy model forward in time.

    Args:
        x0 (numpy.ndarray): Initial state.
        t (numpy.ndarray): Time vector.
        Xi (numpy.ndarray): Discovered SINDy coefficients.
        poly_order (int): Polynomial order used in SINDy.
        include_sine (bool): Whether sine terms were included.

    Returns:
        numpy.ndarray: Simulated state trajectory.
    """
    m = t.size
    n = x0.size
    f = lambda x, t: np.dot(sindy_library(np.array(x).reshape((1, n)), poly_order, include_sine), Xi).reshape((n,))

    x = odeint(f, x0, t)
    return x


def sindy_library_sequence(X_seq, poly_order, include_sine=False):
    """
    Build the SINDy library for a **sequence** of time-series data.

    Args:
        X_seq (numpy.ndarray): Input data of shape (num_sequences, sequence_length, num_features).
        poly_order (int): Maximum polynomial order to include.
        include_sine (bool): Whether to include sine terms.

    Returns:
        numpy.ndarray: The constructed SINDy library.
    """
    num_sequences, sequence_length, num_features = X_seq.shape

    # Reshape into a 2D format
    X_flat = X_seq.reshape(num_sequences * sequence_length, num_features)

    # Construct the library
    library_flat = sindy_library(X_flat, poly_order, include_sine)

    # Reshape back to sequence format
    library = library_flat.reshape(num_sequences, sequence_length, -1)

    return library


def sindy_fit_sequence(RHS_seq, LHS_seq, coefficient_threshold):
    """
    Solve the sparse regression problem for **sequence-based data**.

    Args:
        RHS_seq (numpy.ndarray): The SINDy library matrix (sequences).
        LHS_seq (numpy.ndarray): The derivatives to fit (sequences).
        coefficient_threshold (float): The threshold for coefficient pruning.

    Returns:
        numpy.ndarray: The discovered sparse coefficient matrix.
    """
    num_sequences, sequence_length, library_size = RHS_seq.shape
    _, _, num_features = LHS_seq.shape

    # Reshape into 2D format
    RHS_flat = RHS_seq.reshape(num_sequences * sequence_length, library_size)
    LHS_flat = LHS_seq.reshape(num_sequences * sequence_length, num_features)

    # Solve sparse regression
    Xi = sindy_fit(RHS_flat, LHS_flat, coefficient_threshold)

    return Xi

In [None]:
# ==================== CNN + LSTM Encoder ====================
class CNNLSTMEncoder(nn.Module):
    def __init__(self, input_channels=1, latent_dim=1):
        super(CNNLSTMEncoder, self).__init__()

        # CNN Encoder
        self.encoder_cnn = nn.Sequential(
            nn.Conv2d(input_channels, 16, kernel_size=3, stride=2, padding=1),  # (B, 16, 26, 26)
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),  # (B, 32, 13, 13)
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),  # (B, 64, 7, 7)
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),  # (B, 128, 4, 4)
            nn.ReLU(),
            nn.Flatten(start_dim=1)  # Flatten spatial dimensions
        )

        # Compute CNN output size dynamically
        test_input = torch.zeros(1, input_channels, 51, 51)
        cnn_output_size = self.encoder_cnn(test_input).shape[1]  # Get flattened CNN output size

        # LSTM Encoder
        self.lstm_encoder = nn.LSTM(input_size=cnn_output_size, hidden_size=latent_dim, batch_first=True)

    def forward(self, x):
        batch_size, seq_len, C, H, W = x.shape

        # Flatten batch & sequence dimensions
        x = x.view(batch_size * seq_len, C, H, W)
        encoded_features = self.encoder_cnn(x)  # Shape: (batch_size * seq_len, cnn_output_size)
        encoded_features = encoded_features.view(batch_size, seq_len, -1)  # Restore sequence structure

        # LSTM Encoder
        _, (z, _) = self.lstm_encoder(encoded_features)  # Get final LSTM hidden state
        z = z.squeeze(0)  # Remove extra dimension from LSTM

        return z  # Latent representation

# ==================== CNN + LSTM Decoder ====================
class CNNLSTMDecoder(nn.Module):
    def __init__(self, latent_dim=1, input_channels=1):
        super(CNNLSTMDecoder, self).__init__()

        # CNN Decoder input size (same as CNN Encoder output size)
        self.cnn_output_size = 128 * 4 * 4  # Must match CNN encoder's last layer

        # LSTM Decoder
        self.lstm_decoder = nn.LSTM(input_size=latent_dim, hidden_size=self.cnn_output_size, batch_first=True)

        # CNN Decoder
        self.decoder_cnn = nn.Sequential(
            nn.Unflatten(2, (128, 4, 4)),  # Reshape for transposed convolutions
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),  # (B, 64, 7, 7)
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),  # (B, 32, 13, 13)
            nn.ReLU(),
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),  # (B, 16, 26, 26)
            nn.ReLU(),
            nn.ConvTranspose2d(16, input_channels, kernel_size=3, stride=2, padding=1, output_padding=1),  # (B, 1, 51, 51)
            nn.Sigmoid()  # Normalize output between 0 and 1
        )

    def forward(self, z, seq_len):
        batch_size = z.shape[0]

        # Repeat latent representation across time steps
        repeated_z = z.unsqueeze(1).repeat(1, seq_len, 1)

        # LSTM Decoder
        decoded_features, _ = self.lstm_decoder(repeated_z)

        # CNN Decoder
        decoded_features = decoded_features.view(batch_size * seq_len, self.cnn_output_size, 1, 1)
        x_reconstructed = self.decoder_cnn(decoded_features)  # Shape: (batch_size * seq_len, 1, 51, 51)
        x_reconstructed = x_reconstructed.view(batch_size, seq_len, 1, 51, 51)  # Restore sequence structure

        return x_reconstructed

# ==================== Full CNN + LSTM Autoencoder ====================
class CNNLSTMAutoencoder(nn.Module):
    def __init__(self, input_channels=1, latent_dim=1):
        super(CNNLSTMAutoencoder, self).__init__()
        self.encoder = CNNLSTMEncoder(input_channels, latent_dim)
        self.decoder = CNNLSTMDecoder(latent_dim, input_channels)

    def forward(self, x):
        batch_size, seq_len, _, _, _ = x.shape
        z = self.encoder(x)  # Encode sequence into latent space
        x_reconstructed = self.decoder(z, seq_len)  # Decode latent space back to images
        return x_reconstructed, z

In [None]:
def physics_loss(z, ddz, lambda_phys=0.1):
    """
    Physics-informed loss enforcing the pendulum equation: ddot{z} = -sin(z).

    Args:
        z (torch.Tensor): Latent variable sequence (batch_size, seq_len, latent_dim)
        ddz (torch.Tensor): Second derivative of latent variable (batch_size, seq_len, latent_dim)
        lambda_phys (float): Weighting for physics loss

    Returns:
        physics_loss (torch.Tensor): Physics constraint loss
    """

    physics_residual = ddz + torch.sin(z)  # Difference from expected physics
    physics_loss = lambda_phys * torch.mean(physics_residual ** 2)

    return physics_loss


"""
def noether_loss(z, dz, Xi, lambda_noether=0.05):
    # Compute energy
    kinetic_energy = 0.5 * dz**2
    potential_energy = 1 - torch.cos(z)
    total_energy = kinetic_energy + potential_energy

    # Use SINDy to find conservation laws
    Theta = sindy_library(z, dz, poly_order=3, include_sine=True)
    sindy_energy = Theta @ Xi  # Predicted dynamics

    # Enforce that learned dynamics must respect conservation laws
    return lambda_noether * torch.mean((sindy_energy - total_energy) ** 2)

"""


def sindy_loss(x, x_recon, z, dz, ddz, Theta, Xi, lambda_phys=0.1, lambda1=5e-4, lambda2=5e-5, lambda3=1e-5):
    """
    Computes the full loss:
    1. Reconstruction loss
    2. SINDy loss in x-dot
    3. SINDy loss in z-dot
    4. SINDy sparsity regularization
    5. Physics loss enforcing ddot{z} = -sin(z)

    Args:
        x, x_recon: Original and reconstructed sequences.
        z, dz, ddz: Latent variables and their derivatives.
        Theta: SINDy library matrix.
        Xi: SINDy coefficient matrix.
        lambda_phys: Physics constraint weight.
        lambda1, lambda2, lambda3: Other loss weights.

    Returns:
        total_loss: Combined loss.
    """
    recon_loss = torch.mean((x - x_recon) ** 2)
    sindy_x_loss = torch.mean((dz - torch.matmul(Theta, Xi)) ** 2)
    sindy_z_loss = torch.mean((torch.autograd.grad(z, x, grad_outputs=torch.ones_like(z), create_graph=True)[0] - torch.matmul(Theta, Xi)) ** 2)
    sindy_reg_loss = torch.mean(torch.abs(Xi))

    # Add physics loss
    phys_loss = physics_loss(z, ddz, lambda_phys)

    total_loss = recon_loss + lambda1 * sindy_x_loss + lambda2 * sindy_z_loss + lambda3 * sindy_reg_loss + phys_loss
    return total_loss, recon_loss, sindy_x_loss, sindy_z_loss, sindy_reg_loss, phys_loss


In [None]:
def sindy_library(z, dz, poly_order, include_sine=False):
    """
    Constructs the second-order SINDy library.

    Args:
        z (torch.Tensor): Latent variables (batch_size, seq_len, latent_dim)
        dz (torch.Tensor): First derivatives (batch_size, seq_len, latent_dim)
        poly_order (int): Maximum polynomial order to include
        include_sine (bool): Whether to include sine terms

    Returns:
        Theta (torch.Tensor): Constructed SINDy library (batch_size, seq_len, num_library_terms)
    """
    batch_size, seq_len, latent_dim = z.shape
    z_combined = torch.cat([z, dz], dim=-1)  # Combine z and dz

    library = [torch.ones((batch_size, seq_len, 1), device=z.device)]  # Constant term

    # First-order terms
    for i in range(2 * latent_dim):
        library.append(z_combined[:, :, i:i+1])

    # Higher-order polynomial terms
    if poly_order > 1:
        for i in range(2 * latent_dim):
            for j in range(i, 2 * latent_dim):
                library.append(z_combined[:, :, i:i+1] * z_combined[:, :, j:j+1])

    if poly_order > 2:
        for i in range(2 * latent_dim):
            for j in range(i, 2 * latent_dim):
                for k in range(j, 2 * latent_dim):
                    library.append(z_combined[:, :, i:i+1] * z_combined[:, :, j:j+1] * z_combined[:, :, k:k+1])

    # Optional sine terms
    if include_sine:
        for i in range(2 * latent_dim):
            library.append(torch.sin(z_combined[:, :, i:i+1]))

    # Stack all terms into a single tensor
    Theta = torch.cat(library, dim=-1)
    return Theta  # Shape: (batch_size, seq_len, num_library_terms)

def z_derivative(z, dt):
    """
    Computes first and second time derivatives of the latent space variable z.

    Args:
        z (torch.Tensor): Latent representation (batch_size, seq_len, latent_dim)
        dt (float): Time step between frames

    Returns:
        dz (torch.Tensor): First derivative (batch_size, seq_len, latent_dim)
        ddz (torch.Tensor): Second derivative (batch_size, seq_len, latent_dim)
    """
    dz = torch.zeros_like(z)
    ddz = torch.zeros_like(z)

    # Compute first derivative using central difference
    dz[:, 1:-1, :] = (z[:, 2:, :] - z[:, :-2, :]) / (2 * dt)
    dz[:, 0, :] = (z[:, 1, :] - z[:, 0, :]) / dt  # Forward difference for first point
    dz[:, -1, :] = (z[:, -1, :] - z[:, -2, :]) / dt  # Backward difference for last point

    # Compute second derivative using central difference
    ddz[:, 1:-1, :] = (z[:, 2:, :] - 2 * z[:, 1:-1, :] + z[:, :-2, :]) / (dt**2)
    ddz[:, 0, :] = (z[:, 1, :] - 2 * z[:, 0, :] + z[:, 1, :]) / (dt**2)  # Approximation at first point
    ddz[:, -1, :] = (z[:, -2, :] - 2 * z[:, -1, :] + z[:, -2, :]) / (dt**2)  # Approximation at last point

    return dz, ddz
    

In [None]:
def train_network(autoencoder, training_loader, val_loader, params, device="cuda"):
    """
    Train the CNN + LSTM Autoencoder with SINDy loss.

    Args:
        autoencoder (nn.Module): The autoencoder model (CNN + LSTM).
        training_loader (DataLoader): DataLoader for training data.
        val_loader (DataLoader): DataLoader for validation data.
        params (dict): Training parameters.
        device (str): Device to use ("cuda" or "cpu").

    Returns:
        results_dict (dict): Dictionary containing final loss values and trained SINDy coefficients.
    """
    # Move model to device (GPU or CPU)
    autoencoder = autoencoder.to(device)

    # Optimizer
    optimizer = optim.Adam(autoencoder.parameters(), lr=params["learning_rate"])

    # Initialize SINDy coefficients (random initialization)
    library_dim = params['library_dim']
    latent_dim = params['latent_dim']
    Xi = torch.randn((library_dim, latent_dim), requires_grad=True, device=device)  # Learnable coefficients
    Xi_optimizer = optim.Adam([Xi], lr=1e-3)

    # Loss tracking
    validation_losses = []
    sindy_model_terms = [torch.sum(params['coefficient_mask']).item()]

    print("TRAINING STARTED...")
    for epoch in range(params["max_epochs"]):

        # Training loop
        autoencoder.train()
        total_loss = 0

        for batch in training_loader:
            x = batch['x'].to(device)  # Input sequence (batch_size, seq_len, 1, 51, 51)

            optimizer.zero_grad()
            Xi_optimizer.zero_grad()

            # Forward pass through autoencoder
            x_recon, z = autoencoder(x)

            # Compute time derivatives of z (for SINDy)
            dt = params["dt"]
            dz, ddz = z_derivative(z, dt)

            # Compute SINDy library Theta
            Theta = sindy_library(z, dz, params["poly_order"], params["include_sine"])

            # Compute full SINDy loss
            loss, recon_loss, sindy_x_loss, sindy_z_loss, sindy_reg_loss = sindy_loss(x, x_recon, z, dz, Theta, Xi)

            # Backpropagation
            loss.backward()
            optimizer.step()
            Xi_optimizer.step()  # Optimize Xi separately

            total_loss += loss.item()

        # Validation step
        autoencoder.eval()
        val_loss = 0

        with torch.no_grad():
            for batch in val_loader:
                x = batch['x'].to(device)

                x_recon, z = autoencoder(x)
                dz, ddz = z_derivative(z, dt)
                Theta = sindy_library(z, dz, params["poly_order"], params["include_sine"])
                val_loss, _, _, _, _ = sindy_loss(x, x_recon, z, dz, Theta, Xi)

        # Print progress
        if epoch % params["print_frequency"] == 0:
            print(f"Epoch {epoch}: Train Loss = {total_loss / len(training_loader)}, Validation Loss = {val_loss.item()}")

        validation_losses.append(val_loss.item())

        # Apply sequential thresholding every few epochs
        if params["sequential_thresholding"] and epoch % params["threshold_frequency"] == 0 and epoch > 0:
            params["coefficient_mask"] = (torch.abs(Xi) > params["coefficient_threshold"]).float()
            print(f"Thresholding Applied: {torch.sum(params['coefficient_mask']).item()} active coefficients")
            sindy_model_terms.append(torch.sum(params["coefficient_mask"]).item())

    # Store results
    results_dict = {
        "num_epochs": params["max_epochs"],
        "validation_losses": np.array(validation_losses),
        "sindy_model_terms": np.array(sindy_model_terms),
        "sindy_coefficients": Xi.detach().cpu().numpy(),
    }

    print("TRAINING COMPLETE!")
    return results_dict

In [None]:
# ==================== DATASET GENERATION ====================
local_path = r"C:\Users\User\PIML-2\None Video Models and datasets\Nonlinear Pendulum\nonlinearpendulum_dataset.pkl"

# Ensure the directory exists
os.makedirs(os.path.dirname(local_path), exist_ok=True)

if os.path.exists(local_path):
    print("Loading dataset from local storage...")
    with open(local_path, "rb") as f:
        dataset = pickle.load(f)
else:
    print("Generating new dataset and saving to local storage...")
    sequence_length = 40  # Number of time steps per sequence
    n_ics = 120  # Number of different pendulum initial conditions
    dataset = generate_pendulum_data(n_ics, sequence_length)

    with open(local_path, "wb") as f:
        pickle.dump(dataset, f)
    print("Dataset saved successfully!")


In [None]:
# ==================== HYPERPARAMETERS ====================
params = {
    "latent_dim": 1,  # Dimension of the latent space (angle representation)
    "model_order": 2,  # Second-order dynamics (ddz term)
    "poly_order": 3,  # Polynomial order for SINDy library
    "include_sine": True,  # Include sine terms in the SINDy library
    "library_dim": 10,  # Placeholder (computed in SINDy library function)

    # Loss Weights
    "loss_weight_decoder": 1.0,
    "loss_weight_sindy_x": 5e-4,
    "loss_weight_sindy_z": 5e-5,
    "loss_weight_sindy_regularization": 1e-5,
    "loss_weight_physics": 0.1,  # Physics constraint weight

    # Sequential Thresholding
    "sequential_thresholding": True,
    "coefficient_threshold": 0.1,
    "threshold_frequency": 500,

    # Training Parameters
    "batch_size": 3,
    "learning_rate": 1e-4,
    "max_epochs": 5001,
    "print_frequency": 100,
    "dt": 0.01,  # Time step for finite differences
}

# ==================== DATASET & DATALOADERS ====================
train_size = int(0.8 * len(dataset))  # 80% train, 20% validation
val_size = len(dataset) - train_size

# PyTorch dataset & split
pendulum_dataset = PendulumDataset(dataset)
train_dataset, val_dataset = torch.utils.data.random_split(pendulum_dataset, [train_size, val_size])

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=params["batch_size"], shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=params["batch_size"], shuffle=False, num_workers=4)

print(f"Training Samples: {len(train_dataset)}, Validation Samples: {len(val_dataset)}")

# ==================== AUTOMATIC INPUT DIM DETECTION ====================
sample = pendulum_dataset[0]["x"]
print(f"Sample Shape: {sample.shape}")  # Debugging output

# Correctly infer input_dim based on dataset format
if len(sample.shape) == 2:
    params["input_dim"] = sample.shape[-1]  # If dataset is flattened (num_samples, features)
else:
    params["input_dim"] = sample.shape[1:]  # If dataset is structured as (seq_len, channels, height, width)

print(f"Using input_dim: {params['input_dim']}")  # Debug check

In [None]:
# ==================== MODEL INITIALIZATION ====================
device = "cuda" if torch.cuda.is_available() else "cpu"
autoencoder = CNNLSTMAutoencoder(input_channels=1, latent_dim=params["latent_dim"]).to(device)

# Optimizers
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=params["learning_rate"])

# Initialize SINDy coefficient matrix
Xi = torch.randn((params["library_dim"], params["latent_dim"]), requires_grad=True, device=device)
Xi_optimizer = torch.optim.Adam([Xi], lr=1e-3)