In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from scipy.integrate import solve_ivp
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import mean_absolute_error, mean_squared_error
import optuna
from typing import Tuple, Any, Iterable, Dict

In [None]:
# Double Pendulum Dynamics
def double_pendulum(t, y, l1, l2, m1, m2, g):
    theta1, z1, theta2, z2 = y
    delta = theta2 - theta1
    denom1 = (m1 + m2) * l1 - m2 * l1 * np.cos(delta) ** 2
    denom2 = (l2 / l1) * denom1

    dydt = np.zeros_like(y)
    dydt[0] = z1
    dydt[1] = (
        (m2 * l1 * z1 ** 2 * np.sin(delta) * np.cos(delta)
         + m2 * g * np.sin(theta2) * np.cos(delta)
         + m2 * l2 * z2 ** 2 * np.sin(delta)
         - (m1 + m2) * g * np.sin(theta1))
        / denom1
    )
    dydt[2] = z2
    dydt[3] = (
        (-m2 * l2 * z2 ** 2 * np.sin(delta) * np.cos(delta)
         + (m1 + m2) * g * np.sin(theta1) * np.cos(delta)
         - (m1 + m2) * l1 * z1 ** 2 * np.sin(delta)
         - (m1 + m2) * g * np.sin(theta2))
        / denom2
    )
    return dydt

In [None]:
#define the datasets size and dt
n_pendulums = 2000
dt = 0.01

In [None]:
# Dataset Generation
def generate_dataset(n_pendulums, dt):
    X_data, y_data = [], []
    for _ in range(n_pendulums):
        l1, l2 = np.random.uniform(0.5, 2.0, 2)
        m1, m2 = np.random.uniform(0.5, 2.0, 2)
        g = 9.81
        y0 = np.random.uniform(-np.pi, np.pi, 4)
        t_span = (0, 10)
        t_eval = np.linspace(t_span[0], t_span[1], int(10 / dt))

        sol = solve_ivp(double_pendulum, t_span, y0, t_eval=t_eval, args=(l1, l2, m1, m2, g), method='RK45')
        theta1, z1, theta2, z2 = sol.y

        for i in range(len(t_eval) - 1):
            current_state = [theta1[i], theta2[i], z1[i], z2[i]]
            next_state = [theta1[i + 1], theta2[i + 1], z1[i + 1], z2[i + 1]]
            X_data.append(current_state)
            y_data.append(next_state)

    X_data = torch.tensor(X_data, dtype=torch.float32)
    y_data = torch.tensor(y_data, dtype=torch.float32)
    return X_data, y_data

In [None]:
X_data, y_data = generate_dataset(n_pendulums, dt)

In [None]:
# Split the dataset
train_size = int(0.7 * len(X_data))
val_size = int(0.2 * len(X_data))
test_size = len(X_data) - train_size - val_size

train_X, train_y = X_data[:train_size], y_data[:train_size]
val_X, val_y = X_data[train_size:train_size + val_size], y_data[train_size:train_size + val_size]
test_X, test_y = X_data[train_size + val_size:], y_data[train_size + val_size:]

# Convert to TensorDataset
train_dataset = TensorDataset(train_X, train_y)
val_dataset = TensorDataset(val_X, val_y)
test_dataset = TensorDataset(test_X, test_y)

In [None]:
# Create DataLoaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [None]:
# Energy-Based Loss Function
def energy_loss(pred_coords, pred_coords_dot, true_coords, true_coords_dot, l1, l2, m1, m2, g):
    def compute_energy(coords, coords_dot):
        theta1, theta2 = coords[:, 0], coords[:, 1]
        theta1_dot, theta2_dot = coords_dot[:, 0], coords_dot[:, 1]

        T1 = 0.5 * m1 * (l1 ** 2) * (theta1_dot ** 2)
        T2 = 0.5 * m2 * (
            (l1 ** 2) * (theta1_dot ** 2)
            + (l2 ** 2) * (theta2_dot ** 2)
            + 2 * l1 * l2 * theta1_dot * theta2_dot * torch.cos(theta2 - theta1)
        )
        V1 = -m1 * g * l1 * torch.cos(theta1)
        V2 = -m2 * g * (l1 * torch.cos(theta1) + l2 * torch.cos(theta2))

        T = T1 + T2
        V = V1 + V2
        return T + V

    pred_energy = compute_energy(pred_coords, pred_coords_dot)
    true_energy = compute_energy(true_coords, true_coords_dot)

    return torch.mean((pred_energy - true_energy) ** 2)

In [None]:
# Double Pendulum LNN Class
class LagrangianNN(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(LagrangianNN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.SiLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.SiLU(),
            nn.Linear(hidden_dim, 1)  # Output the Lagrangian scalar
        )

    def forward(self, x):
        return self.net(x)

In [None]:
class DoublePendulumLNN:
    def __init__(self, hidden_dim=128, lr=1e-3):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = LagrangianNN(input_dim=4, hidden_dim=hidden_dim).to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr, weight_decay=1e-4)
        self.criterion = nn.MSELoss()

In [None]:
def predict_next_state(self, coords, coords_dot, dt):
    coords.requires_grad_(True)
    coords_dot.requires_grad_(True)

    coords_dot_input = torch.cat((coords, coords_dot), dim=1)
    L = self.model(coords_dot_input)
    accel = self.lagrangian_to_accelerations(coords, coords_dot, L)

    k1 = accel
    k2 = accel + 0.5 * dt * k1
    k3 = accel + 0.5 * dt * k2
    k4 = accel + dt * k3

    next_coords = coords + dt / 6 * (k1 + 2 * k2 + 2 * k3 + k4)
    next_coords_dot = coords_dot + accel * dt
    return next_coords, next_coords_dot

In [None]:
def lagrangian_to_accelerations(self, coords, coords_dot, L):
    coords.requires_grad_(True)
    coords_dot.requires_grad_(True)

    dL_dq = torch.autograd.grad(L.sum(), coords, create_graph=True, allow_unused=True)[0]
    dL_dq_dot = torch.autograd.grad(L.sum(), coords_dot, create_graph=True, allow_unused=True)[0]

    if dL_dq is None or dL_dq_dot is None:
        raise ValueError("Gradient calculation returned None. Check inputs and computation graph.")

    accel = torch.autograd.grad(dL_dq_dot.sum(), coords_dot, create_graph=True, allow_unused=True)[0] - dL_dq
    return accel

In [None]:
def train(self, train_loader, val_loader, dt, epochs=100, l1=1.0, l2=1.0, m1=1.0, m2=1.0, g=9.81, freeze=False):
    scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.5)
        
    if freeze:
        for param in self.model.net[0].parameters():
            param.requires_grad = False

    for epoch in range(epochs):
        self.model.train()
        total_loss = 0
        for coords, true_next_coords in train_loader:
            coords, true_next_coords = coords.to(self.device), true_next_coords.to(self.device)
            coords, coords_dot = coords[:, :2], coords[:, 2:]
            true_coords, true_coords_dot = true_next_coords[:, :2], true_next_coords[:, 2:]

            self.optimizer.zero_grad()
            pred_next_coords, pred_next_coords_dot = self.predict_next_state(coords, coords_dot, dt)

            loss = (
                self.criterion(pred_next_coords, true_coords)
                + self.criterion(pred_next_coords_dot, true_coords_dot)
                + energy_loss(pred_next_coords, pred_next_coords_dot, true_coords, true_coords_dot, l1, l2, m1, m2, g)
            )
            loss.backward()
            self.optimizer.step()
            total_loss += loss.item()

        scheduler.step()
        val_loss = self.evaluate(val_loader, dt, l1, l2, m1, m2, g)
        print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {total_loss / len(train_loader):.6f}, Val Loss: {val_loss:.6f}")

In [None]:
def evaluate(self, data_loader, dt, l1=1.0, l2=1.0, m1=1.0, m2=1.0, g=9.81):
    self.model.eval()
    total_loss = 0
    with torch.no_grad():
        for coords, true_next_coords in data_loader:
            coords, true_next_coords = coords.to(self.device), true_next_coords.to(self.device)
            coords, coords_dot = coords[:, :2], coords[:, 2:]
            true_coords, true_coords_dot = true_next_coords[:, :2], true_next_coords[:, 2:]

            pred_next_coords, pred_next_coords_dot = self.predict_next_state(coords, coords_dot, dt)

            loss = (
                self.criterion(pred_next_coords, true_coords)
                + self.criterion(pred_next_coords_dot, true_coords_dot)
                + energy_loss(pred_next_coords, pred_next_coords_dot, true_coords, true_coords_dot, l1, l2, m1, m2, g)
            )
            total_loss += loss.item()

    return total_loss / len(data_loader)

In [None]:
# Early Stopping Class
class EarlyStopping:
    def __init__(self, patience=10, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = float('inf')
        self.early_stop = False

    def __call__(self, val_loss):
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

In [None]:
# Hyperparameter Tuning and Training
def tune_hyperparameters(train_loader, val_loader, dt):
    def objective(trial):
        hidden_dim = trial.suggest_int("hidden_dim", 64, 256)
        lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
        model = DoublePendulumLNN(hidden_dim=hidden_dim, lr=lr)
        model.train(train_loader, val_loader, dt, epochs=10)
        return model.evaluate(val_loader, dt)

    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=20)
    print(f"Best Parameters: {study.best_params}")
    return study.best_params

In [None]:
# Hyperparameter Tuning
best_params = tune_hyperparameters(train_loader, val_loader, dt)

In [None]:
# Transfer learning: Reuse model and fine-tune
lnn = DoublePendulumLNN(hidden_dim=best_params['hidden_dim'], lr=1e-4)
lnn.train(train_loader, val_loader, dt, epochs=50, freeze=True)  # Fine-tuning with frozen layers
lnn.train(train_loader, val_loader, dt, epochs=50, freeze=False)  # Unfreeze layers and train further

In [None]:
# Evaluate on Test Data
test_loss = lnn.evaluate(test_loader, dt)
print(f"Test Loss: {test_loss:.6f}")

In [None]:
# Visualization of Double Pendulum as Animation
def visualize_double_pendulum(X_data, X_pred, l1=1.0, l2=1.0, save_path=None):
    theta1_true = X_data[:, 0].numpy()
    theta2_true = X_data[:, 1].numpy()
    theta1_pred = X_pred[:, 0].numpy()
    theta2_pred = X_pred[:, 1].numpy()

    # Compute positions for ground truth
    x1_true = l1 * np.sin(theta1_true)
    y1_true = -l1 * np.cos(theta1_true)
    x2_true = x1_true + l2 * np.sin(theta2_true)
    y2_true = y1_true - l2 * np.cos(theta2_true)

    # Compute positions for predictions
    x1_pred = l1 * np.sin(theta1_pred)
    y1_pred = -l1 * np.cos(theta1_pred)
    x2_pred = x1_pred + l2 * np.sin(theta2_pred)
    y2_pred = y1_pred - l2 * np.cos(theta2_pred)

    fig, ax = plt.subplots(figsize=(8, 8))
    ax.set_xlim(-2.5, 2.5)
    ax.set_ylim(-2.5, 2.5)
    ax.set_aspect('equal')

    line_true, = ax.plot([], [], 'o-', color='blue', label='Ground Truth', lw=2)
    line_pred, = ax.plot([], [], 'o-', color='red', label='Prediction', lw=2)
    ax.legend()

    def init():
        line_true.set_data([], [])
        line_pred.set_data([], [])
        return line_true, line_pred

    def update(frame):
        line_true.set_data([0, x1_true[frame], x2_true[frame]], [0, y1_true[frame], y2_true[frame]])
        line_pred.set_data([0, x1_pred[frame], x2_pred[frame]], [0, y1_pred[frame], y2_pred[frame]])
        return line_true, line_pred

    ani = FuncAnimation(fig, update, frames=len(theta1_true), blit=True, interval=50)

    if save_path:
        ani.save(save_path, fps=20, writer='imagemagick')
    else:
        plt.show()

# Example Visualization
# Test the visualization with ground truth and predictions
sample_test_X = test_X[:200]
with torch.no_grad():
    sample_test_pred = lnn.model(sample_test_X.to(lnn.device)).cpu()
visualize_double_pendulum(sample_test_X, sample_test_pred, save_path='comparison_double_pendulum.gif')