In [None]:
import numpy as np
import tensorflow as tf
from scipy.integrate import solve_ivp
import pickle
import pysindy as ps
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import ParameterGrid
import os
from torchdiffeq import odeint
import matplotlib.pyplot as plt
from torch.fft import fft, ifft
from mpl_toolkits.mplot3d import Axes3D
import optuna
import pandas as pd
import seaborn as sns
import scipy.stats as stats
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import MultiTaskLassoCV, LinearRegression
from sklearn.feature_selection import SelectFromModel
from typing import Tuple, Any, Iterable, Dict
from sklearn.model_selection import train_test_split

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

In [None]:
def generate_lorenz_data(timesteps, dt, initial_conditions, sigma=10, beta=8/3, rho=28):
    """
    Generate Lorenz system data.

    :param timesteps: Number of timesteps to simulate.
    :param dt: Time step size.
    :param initial_conditions: Initial conditions (x0, y0, z0).
    :param sigma: Lorenz system parameter.
    :param beta: Lorenz system parameter.
    :param rho: Lorenz system parameter.
    :return: Numpy array of shape (timesteps, 3).
    """
    def lorenz(t, state):
        x, y, z = state
        dxdt = sigma * (y - x)
        dydt = x * (rho - z) - y
        dzdt = x * y - beta * z
        return [dxdt, dydt, dzdt]

    t_eval = np.linspace(0, dt * timesteps, timesteps)
    sol = solve_ivp(lorenz, [0, dt * timesteps], initial_conditions, t_eval=t_eval, method='RK45')
    return sol.y.T  # Transpose to shape (timesteps, 3)

# Generate dataset parameters
timesteps = 7500  # Extended sequence length for better visualization
n_samples = 20000  # Total number of sequences
dt = 0.01  # Time step size

# Generate data
np.random.seed(42)
initial_conditions_list = np.random.uniform(-10, 10, size=(n_samples, 3))  # Initial conditions closer to attractor

data = np.array([generate_lorenz_data(timesteps, dt, ic) for ic in initial_conditions_list])

In [None]:
# Split data into train, validation, and test sets
train_data = data[:16000]
val_data = data[16000:18000]
test_data = data[18000:]

In [None]:
print("\nExamples from the dataset:")
fig = plt.figure(figsize=(15, 10))
for i in range(10):
    example = data[i]
    print(example)
    ax = fig.add_subplot(2, 5, i + 1, projection='3d')
    ax.plot(example[:, 0], example[:, 1], example[:, 2])
    ax.set_title(f"Example {i + 1}")
    ax.set_xlabel("X")
    ax.set_ylabel("Y")
    ax.set_zlabel("Z")

plt.tight_layout()
plt.show()

In [None]:
batch_size = 64

def prepare_dataloader(data, batch_size):
    inputs = torch.tensor(data[:, :-1, :], dtype=torch.float32)  # All but last timestep as input
    targets = torch.tensor(data[:, -1, :], dtype=torch.float32)  # Last timestep as target
    dataset = TensorDataset(inputs, targets)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

train_loader = prepare_dataloader(train_data, batch_size)
val_loader = prepare_dataloader(val_data, batch_size)
test_loader = prepare_dataloader(test_data, batch_size)

In [None]:
class AutoencoderRNN(nn.Module):
    def __init__(self, input_dim, latent_dim, seq_len, rnn_hidden_dim, rnn_layers, output_dim):
        super(AutoencoderRNN, self).__init__()
        self.seq_len = seq_len

        # Encoder: Compress the input features
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, latent_dim),
            nn.ReLU()
        )

        # RNN: LSTM for sequence modeling
        self.rnn = nn.LSTM(
            input_size=latent_dim,
            hidden_size=rnn_hidden_dim,
            num_layers=rnn_layers,
            batch_first=True
        )

        # Decoder: Map the latent representation to the output
        self.decoder = nn.Sequential(
            nn.Linear(rnn_hidden_dim, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim)
        )

    def forward(self, x):
        batch_size, seq_len, input_dim = x.size()

        # Apply encoder to each time step
        x = x.view(-1, input_dim)  # Flatten for encoder
        x = self.encoder(x)
        x = x.view(batch_size, seq_len, -1)  # Reshape for RNN

        # Pass through RNN
        rnn_out, _ = self.rnn(x)

        # Use the last RNN output for prediction
        final_out = rnn_out[:, -1, :]  # (batch, rnn_hidden_dim)

        # Decode the RNN output
        output = self.decoder(final_out)
        return output

In [None]:
# Modified Training Loop with Early Stopping
class EarlyStopping:
    def __init__(self, patience=10, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = float('inf')
        self.early_stop = False

    def __call__(self, val_loss):
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

In [None]:
# Training and Evaluation
def train_and_evaluate_autoencoder_rnn(model, train_loader, val_loader, epochs, learning_rate):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)  # L2 Regularization
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)  # Learning rate scheduler
    early_stopping = EarlyStopping(patience=10)

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}")

        if early_stopping(val_loss):
            print("Early stopping triggered.")
            break

    return val_loss

In [None]:
param_grid = {
    'input_dim': [3],  # Number of input features (e.g., x, y, z)
    'latent_dim': [16, 32],  # Latent space dimensionality
    'seq_len': [100],  # Sequence length
    'rnn_hidden_dim': [64, 128],  # RNN hidden size
    'rnn_layers': [1, 2],  # Number of RNN layers
    'output_dim': [3],  # Output size (e.g., x, y, z)
    'learning_rate': [0.001, 0.0005],
    'epochs': [50]
}

best_model = None
best_loss = float('inf')
best_params = None

for params in ParameterGrid(param_grid):
    print(f"Testing params: {params}")
    model = AutoencoderRNN(
        input_dim=params['input_dim'],
        latent_dim=params['latent_dim'],
        seq_len=params['seq_len'],
        rnn_hidden_dim=params['rnn_hidden_dim'],
        rnn_layers=params['rnn_layers'],
        output_dim=params['output_dim']
    )
    val_loss = train_and_evaluate_autoencoder_rnn(
        model, train_loader, val_loader,
        epochs=params['epochs'], learning_rate=params['learning_rate']
    )
    if val_loss < best_loss:
        best_loss = val_loss
        best_model = model
        best_params = params

print(f"Best Params: {best_params}, Best Validation Loss: {best_loss:.4f}")


In [None]:
def test_and_compare(best_model, test_loader, dt):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    best_model.to(device)

    best_model.eval()
    all_model_predictions = []
    all_ground_truth = []

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.cpu().numpy()
            outputs = best_model(inputs).cpu().numpy()
            
            # Collect predictions and ground truth
            all_model_predictions.append(outputs)
            all_ground_truth.append(targets)

    # Convert lists to arrays
    all_model_predictions = np.concatenate(all_model_predictions, axis=0)
    all_ground_truth = np.concatenate(all_ground_truth, axis=0)
    all_sindy_predictions = np.concatenate(all_sindy_predictions, axis=0)

    # Calculate errors
    model_mae = mean_absolute_error(all_ground_truth, all_model_predictions)
    sindy_mae = mean_absolute_error(all_ground_truth, all_sindy_predictions)
    
    print(f"Model Mean Absolute Error: {model_mae:.4f}")
    print(f"SINDy Mean Absolute Error: {sindy_mae:.4f}")

    # Visualize comparisons for a few samples
    num_samples_to_plot = 5
    fig = plt.figure(figsize=(15, 10))
    for i in range(num_samples_to_plot):
        ax = fig.add_subplot(1, num_samples_to_plot, i + 1, projection='3d')

        # Ground truth
        ax.plot(
            all_ground_truth[i, :, 0],
            all_ground_truth[i, :, 1],
            all_ground_truth[i, :, 2],
            'g',
            label="Ground Truth"
        )
        # Model predictions
        ax.plot(
            all_model_predictions[i, :, 0],
            all_model_predictions[i, :, 1],
            all_model_predictions[i, :, 2],
            'b',
            label="Model Predictions"
        )
        ax.set_title(f"Sample {i + 1}")
        ax.set_xlabel("X")
        ax.set_ylabel("Y")
        ax.set_zlabel("Z")
        ax.legend()

    plt.tight_layout()
    plt.show()


# Prepare the dataloader for the test dataset
test_loader = prepare_dataloader(test_data, batch_size)

# Compare the predictions of the best model and SINDy model
test_and_compare(best_model, best_sindy_model, test_loader, dt=0.01)