In [None]:
import os
import time

import matplotlib.pyplot as plt
import numpy as np
import torch
from tqdm.notebook import tqdm

from dimp.utils import init_matplotlib, get_colors

from utils import (
    RunMode, get_n_epochs, get_method_run_mode, pickle_name,
    theta_2_dt, Ad_Bd_from_dt, LQs_LRs_from_dt,
    zoh_cost_matrices,
    task_loss, uniform_resampling_loss, substep_loss, evaluate_continuous_cost,
    plot_timegrid, plot_colored, plot_training_res, save_training_res,
    save_pickle, load_pickle,
    extract_trajectory_data, compute_trajectory_metrics,
    plot_density_and_changes, plot_cross_correlations,
)
from pann_clqr import (
    create_pann_clqr,
    create_pann_param_clqr,
    create_pann_param_clqr_2,
    create_exact_param_pann_clqr,
    create_exact_param_pann_clqr_2,
    create_exact_zoh_cost_clqr,
)

init_matplotlib()
colors = get_colors()

%matplotlib widget

In [None]:
run_mode = RunMode.DISPLAY      # DISPLAY | TEST | FULL (default for all methods)
run_overrides = {               # Per-method overrides (empty = use run_mode for all)
    # "zoh3": RunMode.FULL,     # Uncomment to rerun only ZOH3
}

n = 160                         # Number of timesteps (default, used by most methods)
n_zoh3 = 80                     # Number of timesteps for ZOH3

n_epochs_override = None        # Set to int to override all methods' epoch counts
out_dir = "data/pann_clqr_dt"

os.makedirs(out_dir, exist_ok=True)

## Example Pannocchia ðŸŒ½

Contnuous LTI system with dynamics
$$
\dot{s} = A s + B u
$$

3 states and 1 input.

### Define the problem matrices, the initial state, the time horizon, and the LQR matrices.

In [None]:
A = np.array([
    [-0.1, 0, 0],
    [0, -2, -6.25],
    [0, 4, 0]
])

B = np.array([[0.25], [2.0], [0.0]])

s0 = np.array([1.344, -4.585, 5.674])   # initial state

T = 10.0        # time window

N = 1000        # max number of timesteps of the OCP

Q = 1.0 * np.eye(3)
R = 0.1 * np.eye(1)

u_max = 1.0     # max control input

n_s = 3         # number of states
n_u = 1         # number of inputs

### Create the Optimization Problem

Classic constrained LQR problem.
Discretized with first-order Euler.
$$
\begin{align*}
& \min_{\substack{s_{k+1}, u_k \\ k=0, \dots, N}} \quad & \sum_{k=0}^{N} \left( s_k^T Q s_k + u_k^T R u_k \right), \\
& \text{s.t.} \quad & s_{k+1} = s_k + \Delta t (A s_k + B u_k), \\
& & s_0 = s_{\text{init}}, \\
& & u_k \in U.
\end{align*}
$$

In [None]:
dt_base = T / N
pann_clqr, s_base, u_base = create_pann_clqr(N, s0, A, B, Q, R, dt_base, u_max)
assert pann_clqr.is_dpp()

### Study how the solution changes with different number of samples.

With $n=80$ samples the system is unstable.
From $n=160$ timesteps the OCP stabilizes the system.
However, increasing the number of timesteps further improves the solution (see the optimal cost value).

In [None]:
# Number of timesteps of the trajectory optimization problem.
interval = 80

fig, axs = plt.subplots(12, 2, figsize=(6.4, 12.8), constrained_layout=True)

i = 0
for n_test in range(interval, N + 1, interval):
    dt_test = T / n_test
    prob_test, s_test, u_test = create_pann_clqr(n_test, s0, A, B, Q, R, dt_test, u_max)

    start_time = time.time()
    prob_test.solve()
    solve_time = time.time() - start_time
    
    print(
        f"n = {n_test}\n"
        f"Optimal cost: {prob_test.objective.value:.4f}, "
        f"solve time meas: {solve_time:.4f} s, "
        f"solve time: {prob_test.solver_stats.solve_time:.4f} s\n"
    )

    times = np.arange(n_test) * dt_test
    s_vec = np.array([sv.value for sv in s_test[1:n_test+1]])
    u_vec = np.array([uv.value for uv in u_test[:n_test]])

    axs[2*(i//2), i%2].plot(times, s_vec, label=['x', 'y', 'z'])
    axs[2*(i//2), i%2].set(
        xlabel='Time',
        ylabel='State',
        title=fr"$n={n_test}$",
    )

    axs[2*(i//2)+1, i%2].plot(times, u_vec)
    axs[2*(i//2)+1, i%2].set(
        xlabel='Time',
        ylabel='Input',
    )

    i = i + 1

## DQP with Auxiliary Variables

### Create the Parametrized CLQR Problem

Discretize then linearize the (nonlinear) dynamics.
$$
s_{k+1} = \bar{s}_k + \bar{\delta}_k (A \bar{s}_k + B u_k) + (I + \bar{\delta}_k A) \tilde{s}_k + \bar{\delta}_k B u_k + (A \bar{s}_k + B u_k) \tilde{\delta}_k,
$$
where $\tilde{\square} = \square - \bar{\square}$ and $\delta_k$ is an auxiliary variable that represents the time step lengths.

The optimization vector of the QP is $\begin{bmatrix} \tilde{s}_{k=1,\dots,N} & \tilde{u}_{k=0,\dots,N-1} & \tilde{\delta}_{k=0,\dots,N} -1\end{bmatrix}$.

The QP is parametrized by the parameter vector $\bar{\delta} = \begin{bmatrix} \bar{\delta}_1 & \dots & \bar{\delta}_N \end{bmatrix}^T$.
$$
\begin{align*}
& \min_{\substack{s_{k+1}, u_k, \delta_k \\ k=0, \dots, N}} \quad & \sum_{k=0}^{N} \left(\bar{\delta}_k \left( s_k^T Q s_k + u_k^T R u_k \right) + w_\delta \tilde{\delta}_k^2 \right), \\
& \text{s.t.} \quad & s_{k+1} = \bar{s}_k + \bar{\delta}_k (A \bar{s}_k + B \bar{u}_k) + (I + \bar{\delta}_k A) \tilde{s}_k + \bar{\delta}_k B \tilde{u}_k + (A \bar{s}_k + B \bar{u}_k) \tilde{\delta}_k, \\
& & s_0 = s_{\text{init}}, \\
& & u_k \in U.
\end{align*}
$$

### Task Loss

| Method          | Loss                                                                                                           |
| --------------- | -------------------------------------------------------------------------------------------------------------- |
| Unscaled        | $$\mathcal{L}_1 = \sum_{k=0}^{N} \left(\lVert s_k \rVert^2_Q  + \lVert u_k \rVert^2_R \right)$$                |
| Time scaled     | $$\mathcal{L}_2 = \sum_{k=0}^{N} \delta_k \left(\lVert s_k \rVert^2_Q  + \lVert u_k \rVert^2_R \right)$$       |
| Time bar scaled | $$\mathcal{L}_3 = \sum_{k=0}^{N} \bar{\delta}_k \left(\lVert s_k \rVert^2_Q  + \lVert u_k \rVert^2_R \right)$$ |

### Training Loop

In [None]:
loss_methods = ["time scaled", "time bar scaled"]

_rm = get_method_run_mode(run_mode, "aux", run_overrides)
if _rm != RunMode.DISPLAY:
    n_epochs = get_n_epochs(_rm, "aux", n_epochs_override)

    history_aux = []
    sol_aux = {}

    for method in loss_methods:
        print(f"Method: {method}")

        dt_init = T / n
        dts_torch = [torch.nn.Parameter(torch.ones(1) * dt_init) for _ in range(n)]
        optim = torch.optim.Adam(dts_torch, lr=5e-4)

        with torch.no_grad():
            for d in dts_torch:
                d.copy_(torch.ones(1) * dt_init)

        s_bar = [s0 + (np.zeros(n_s) - s0) * i / n for i in range(n)]
        u_bar = [np.zeros(n_u) for _ in range(n)]

        with tqdm(total=n_epochs) as pbar:
            for epoch in range(n_epochs):
                pbar.update(1)
                optim.zero_grad()

                _, layer_aux, _, _, _, dts_params = create_pann_param_clqr(
                    n, s0, A, B, Q, R, s_bar, u_bar, u_max, T,
                )
                sol_aux[method] = layer_aux(*dts_torch)

                s_bar = [sol_aux[method][i].detach().numpy() for i in range(n)]
                u_bar = [sol_aux[method][n + i].detach().numpy() for i in range(n)]

                # Aux-specific loss: extract deltas from solution
                states_sol = [sol_aux[method][i] for i in range(n)]
                inputs_sol = [sol_aux[method][n + i] for i in range(n)]
                deltas_sol = [sol_aux[method][2 * n + i] for i in range(n)]

                if method == "time scaled":
                    loss = sum(
                        deltas_sol[i] * states_sol[i].t() @ torch.tensor(Q, dtype=torch.float32) @ states_sol[i]
                        for i in range(n)
                    ) + sum(
                        deltas_sol[i] * inputs_sol[i].t() @ torch.tensor(R, dtype=torch.float32) @ inputs_sol[i]
                        for i in range(n)
                    )
                elif method == "time bar scaled":
                    deltas_bar = np.concatenate([d.detach().numpy() for d in dts_torch])
                    loss = sum(
                        deltas_bar[i] * states_sol[i].t() @ torch.tensor(Q, dtype=torch.float32) @ states_sol[i]
                        for i in range(n)
                    ) + sum(
                        deltas_bar[i] * inputs_sol[i].t() @ torch.tensor(R, dtype=torch.float32) @ inputs_sol[i]
                        for i in range(n)
                    )
                else:
                    loss = task_loss(states_sol, inputs_sol, dts_torch, Q, R, method="unscaled")

                loss.backward()
                optim.step()

                with torch.no_grad():
                    for d in dts_torch:
                        d.clamp_(min=1e-6, max=0.07)
                        d *= T / sum(dts_torch)

                history_aux.append({
                    'method': method,
                    'loss': loss.item(),
                    'dts': [d.detach().numpy() for d in dts_torch],
                })

    save_pickle(out_dir, "sol_aux", sol_aux)
    save_pickle(out_dir, "history_aux", history_aux)
else:
    sol_aux = load_pickle(out_dir, "sol_aux")
    history_aux = load_pickle(out_dir, "history_aux")

In [None]:
for method in loss_methods:
    hist_m = [h for h in history_aux if h['method'] == method]
    plot_training_res(sol_aux[method], hist_m, n, sol_method=1)
    save_training_res(
        "out", method.replace(" ", "_"),
        sol_aux[method], hist_m, n, sol_method=1,
    )

## DQP with Reparametrized Parameters

### DQP With Reparametrized Timesteps

Reparametrize time steps with the simplex:
$$
\Delta t_k = \epsilon + (T - n \epsilon) \frac{e^{\theta_k}}{\sum_j e^{\theta_j}}
$$
This enforces both positivity and the total time constraint $\sum_k \Delta t_k = T$ without discontinuous updates.

The optimization vector of the QP is $\begin{bmatrix} \tilde{s}_{k=1,\dots,N} & \tilde{u}_{k=0,\dots,N-1}\end{bmatrix}$.

$$
\begin{align*}
& \min_{\substack{s_{k+1}, u_k \\ k=0, \dots, N}} \quad & \sum_{k=0}^{N} \left(\Delta t_k \left( s_k^T Q s_k + u_k^T R u_k \right)\right), \\
& \text{s.t.} \quad & s_{k+1} = \bar{s}_k + \Delta t_k (A \bar{s}_k + B \bar{u}_k) + (I + \Delta t_k A) \tilde{s}_k + \Delta t_k B \tilde{u}_k, \\
& & s_0 = s_{\text{init}}, \\
& & u_k \in U.
\end{align*}
$$

The OCP uses $\Delta t_k (\theta)$ as a parameter. The optimizer optimizes $\theta$. Gradients do flow through the softmax function in PyTorch.

### Task Loss

| Method       | Loss                                                                                                       |
| ------------ | ---------------------------------------------------------------------------------------------------------- |
| ~~Unscaled~~ | $$\mathcal{L}_1 = \sum_{k=0}^{N} \left(\lVert s_k \rVert^2_Q  + \lVert u_k \rVert^2_R \right)$$            |
| Time scaled  | $$\mathcal{L}_2 = \sum_{k=0}^{N} \Delta t_k \left(\lVert s_k \rVert^2_Q  + \lVert u_k \rVert^2_R \right)$$ |

In [None]:
loss_methods_2 = ["time scaled"]

_rm = get_method_run_mode(run_mode, "rep", run_overrides)
if _rm != RunMode.DISPLAY:
    n_epochs = get_n_epochs(_rm, "rep", n_epochs_override)

    history_rep = []
    sol_rep = {}

    for method in loss_methods_2:
        print(f"Method: {method}")

        theta = torch.nn.Parameter(torch.ones(n, 1))
        optim = torch.optim.Adam([theta], lr=1e-2)

        _, layer_rep, _, _, _ = create_pann_param_clqr_2(n, s0, A, B, Q, R, u_max)

        with tqdm(total=n_epochs) as pbar:
            for epoch in range(n_epochs):
                pbar.update(1)
                optim.zero_grad(set_to_none=True)

                dts_torch = theta_2_dt(theta, T, n)
                sol_rep[method] = layer_rep(dts_torch)

                states_sol = [sol_rep[method][i] for i in range(n)]
                inputs_sol = [sol_rep[method][n + i] for i in range(n)]

                loss = task_loss(states_sol, inputs_sol, dts_torch, Q, R,
                                method="time_scaled" if method == "time scaled" else "unscaled")
                loss.backward()
                optim.step()

                history_rep.append({
                    'method': method,
                    'loss': loss.item(),
                    'dts': dts_torch.detach().numpy(),
                })

    save_pickle(out_dir, "history_rep", history_rep)
    save_pickle(out_dir, "sol_rep", sol_rep)
else:
    history_rep = load_pickle(out_dir, "history_rep")
    sol_rep = load_pickle(out_dir, "sol_rep")

In [None]:
for method in loss_methods_2:
    hist_m = [h for h in history_rep if h['method'] == method]
    plot_training_res(sol_rep[method], hist_m, n, sol_method=2)
    save_training_res(
        "out", method.replace(" ", "_") + "_rep",
        sol_rep[method], hist_m, n, sol_method=2,
    )

## Loss Hyper Sampling

### Gradient Verification

In [None]:
def test_gradient_flow():
    """Verify gradients flow correctly through both loss functions."""
    n_test = 20
    theta_test = torch.nn.Parameter(torch.ones(n_test, 1))

    A_torch = torch.tensor(A, dtype=torch.float32)
    B_torch = torch.tensor(B, dtype=torch.float32)
    Q_torch = torch.tensor(Q, dtype=torch.float32)
    R_torch = torch.tensor(R, dtype=torch.float32)
    s0_torch = torch.tensor(s0, dtype=torch.float32)

    dts_test = theta_2_dt(theta_test, T, n_test)
    inputs_test = [torch.randn(n_u, requires_grad=True) for _ in range(n_test)]

    # === Uniform Resampling ===
    loss1 = uniform_resampling_loss(
        inputs_test, dts_test, s0_torch,
        A_torch, B_torch, Q_torch, R_torch,
        T=T, n_res=100, use_exact=False,
    )
    loss1.backward()

    print("Approach 1 (Uniform Resampling - Euler):")
    print(f"  Loss value: {loss1.item():.6f}")
    print(f"  theta.grad exists: {theta_test.grad is not None}")
    if theta_test.grad is not None:
        print(f"  theta.grad norm: {theta_test.grad.norm().item():.6f}")

    theta_test.grad = None
    for u in inputs_test:
        if u.grad is not None:
            u.grad = None

    dts_test = theta_2_dt(theta_test, T, n_test)

    # === Substeps ===
    loss2 = substep_loss(
        inputs_test, dts_test, s0_torch,
        A_torch, B_torch, Q_torch, R_torch,
        n_sub=10, use_exact=False,
    )
    loss2.backward()

    print("\nApproach 2 (Substeps - Euler):")
    print(f"  Loss value: {loss2.item():.6f}")
    print(f"  theta.grad exists: {theta_test.grad is not None}")
    if theta_test.grad is not None:
        print(f"  theta.grad norm: {theta_test.grad.norm().item():.6f}")

test_gradient_flow()

### Training with Hyper-Sampling (HS) Losses

Problem that HS attempts to solve: the loss is computed only along the trajectory (non-uniform) sampling points. The loss may not reflect the true cost over the entire time horizon.

A possible solution would be to compute the exact cost along the trajectory assuming ZOH inputs. Instead, here, we propose two different strategies.

#### Dense Uniform Grid

The non-uniform grid is replaced with a dense uniform grid. The input is constant within the original non-uniform intervals. The dynamics is integrated with first-order Euler on the dense grid.

**Main problem**: the dense grid is computed with `detached()`, the gradients do not flow through it.

#### Substeps

Within each original non-uniform interval, we introduce $M$ substeps. The dynamics is integrated with first-order Euler on the substeps.

In [None]:
loss_methods_hs = ["uniform_resample", "substeps"]

_rm = get_method_run_mode(run_mode, "hs", run_overrides)
if _rm != RunMode.DISPLAY:
    n_epochs = get_n_epochs(_rm, "hs", n_epochs_override)
    n_res_hs = 1000
    n_sub_hs = 10

    history_hs = []
    sol_hs = {}

    A_torch = torch.tensor(A, dtype=torch.float32)
    B_torch = torch.tensor(B, dtype=torch.float32)
    Q_torch = torch.tensor(Q, dtype=torch.float32)
    R_torch = torch.tensor(R, dtype=torch.float32)
    s0_torch = torch.tensor(s0, dtype=torch.float32)

    for method in loss_methods_hs:
        print(f"Training with method: {method}")

        theta = torch.nn.Parameter(torch.ones(n, 1))
        optim = torch.optim.Adam([theta], lr=1e-2)

        _, layer_hs, _, _, _ = create_pann_param_clqr_2(n, s0, A, B, Q, R, u_max)

        with tqdm(total=n_epochs) as pbar:
            for epoch in range(n_epochs):
                pbar.update(1)
                optim.zero_grad(set_to_none=True)

                dts_torch = theta_2_dt(theta, T, n)
                sol_hs[method] = layer_hs(dts_torch)

                inputs_qp = [sol_hs[method][n + i] for i in range(n)]

                if method == "uniform_resample":
                    loss = uniform_resampling_loss(
                        inputs_qp, dts_torch, s0_torch,
                        A_torch, B_torch, Q_torch, R_torch,
                        T=T, n_res=n_res_hs, use_exact=False,
                    )
                elif method == "substeps":
                    loss = substep_loss(
                        inputs_qp, dts_torch, s0_torch,
                        A_torch, B_torch, Q_torch, R_torch,
                        n_sub=n_sub_hs, use_exact=False,
                    )
                else:
                    raise ValueError(f"Unknown method: {method}")

                loss.backward()
                optim.step()

                history_hs.append({
                    'method': method,
                    'epoch': epoch,
                    'loss': loss.item(),
                    'dts': dts_torch.detach().cpu().numpy(),
                })

        print(f"  Final loss: {history_hs[-1]['loss']:.6f}\n")

    save_pickle(out_dir, "sol_hs", sol_hs)
    save_pickle(out_dir, "history_hs", history_hs)
else:
    sol_hs = load_pickle(out_dir, "sol_hs")
    history_hs = load_pickle(out_dir, "history_hs")

In [None]:
for method in loss_methods_hs:
    hist_m = [h for h in history_hs if h['method'] == method]
    plot_training_res(sol_hs[method], hist_m, n, sol_method=2)
    save_training_res(
        "out", method,
        sol_hs[method], hist_m, n, sol_method=2,
    )

In [None]:
# Comparison plot: uniform resampling vs substeps
fig, axs = plt.subplots(2, 2, figsize=(9.6, 6.4), constrained_layout=True)

for method in loss_methods_hs:
    history_method = [h for h in history_hs if h['method'] == method]
    axs[0, 0].plot([h['loss'] for h in history_method], label=method)
axs[0, 0].set_xlabel("Epoch")
axs[0, 0].set_ylabel("Loss")
axs[0, 0].set_title("Loss Convergence")
axs[0, 0].legend()

for i, method in enumerate(loss_methods_hs):
    history_method = [h for h in history_hs if h['method'] == method]
    d_arr = history_method[-1]['dts']
    times = np.cumsum(d_arr)
    axs[0, 1].plot(times, d_arr, label=method)
axs[0, 1].set_xlabel("Time")
axs[0, 1].set_ylabel("Timestep duration")
axs[0, 1].set_title("Final Timestep Distributions")
axs[0, 1].legend()

for i, method in enumerate(loss_methods_hs):
    history_method = [h for h in history_hs if h['method'] == method]
    d_arr = history_method[-1]['dts']
    axs[1, i].hist(d_arr.flatten(), bins=30, alpha=0.7, edgecolor='black')
    axs[1, i].set_xlabel("Timestep duration")
    axs[1, i].set_ylabel("Count")
    axs[1, i].set_title(f"Histogram: {method}")
    axs[1, i].axvline(T/n, color='r', linestyle='--', label=f"uniform={T/n:.4f}")
    axs[1, i].legend()

fig.suptitle("Comparison: Uniform Resampling vs Substeps", fontsize=14)
plt.show()

In [None]:
# Evaluate "True" Continuous-Time Cost
print("=== True Continuous-Time Cost Comparison ===\n")

for method in loss_methods_hs:
    sol = sol_hs[method]
    history_method = [h for h in history_hs if h['method'] == method]
    dts_final = history_method[-1]['dts']
    inputs_qp = [sol[n + i] for i in range(n)]

    true_cost = evaluate_continuous_cost(inputs_qp, dts_final, s0, A, B, Q, R, T)

    print(f"{method}:")
    print(f"  Training loss (final): {history_method[-1]['loss']:.4f}")
    print(f"  True continuous cost:  {true_cost:.4f}")
    print(f"  dt range: [{np.min(dts_final):.5f}, {np.max(dts_final):.5f}]")
    print(f"  dt std:   {np.std(dts_final):.5f}")
    print()

print("=== Comparison with Other Methods ===\n")

try:
    for method in loss_methods_2:
        sol = sol_rep[method]
        history_method = [h for h in history_rep if h['method'] == method]
        dts_final = history_method[-1]['dts']
        inputs_qp = [sol[n + i] for i in range(n)]
        true_cost = evaluate_continuous_cost(inputs_qp, dts_final, s0, A, B, Q, R, T)
        print(f"Reparametrized ({method}): true_cost = {true_cost:.4f}")
except:
    pass

try:
    for method in loss_methods_zoh:
        sol = sol_zoh[method]
        history_method = [h for h in history_zoh if h['method'] == method]
        dts_final = history_method[-1]['dts']
        inputs_qp = [sol[n + i] for i in range(n)]
        true_cost = evaluate_continuous_cost(inputs_qp, dts_final, s0, A, B, Q, R, T)
        print(f"ZOH ({method}): true_cost = {true_cost:.4f}")
except:
    pass

## DQP With ZOH Exact Discretization

Use the exact discretization of the LTI system with zero-order hold (ZOH).

OCP parameters:
$$
A_{d, k}, B_{d, k}
= \operatorname{ZOH}(A, B, \Delta t_k)
= \exp \left( \begin{bmatrix} A & B \\ 0 & 0 \end{bmatrix} \Delta t_k \right)
= \begin{bmatrix} A_{d, k} & B_{d, k} \\ 0 & I \end{bmatrix}
$$

### No Cost Scaling With Interval Duration

In [None]:
loss_methods_zoh = ["time scaled"]

_rm = get_method_run_mode(run_mode, "zoh", run_overrides)
if _rm != RunMode.DISPLAY:
    n_epochs = get_n_epochs(_rm, "zoh", n_epochs_override)
    history_zoh = []
    sol_zoh = {}

    for method in loss_methods_zoh:
        print(f"ZOH Method: {method}")

        theta = torch.nn.Parameter(torch.ones(n, 1))
        optim = torch.optim.Adam([theta], lr=1e-2)

        _, layer_zoh, _, _, _, _ = create_exact_param_pann_clqr(
            n, s0, n_s, n_u, Q, R, u_max,
        )

        with tqdm(total=n_epochs) as pbar:
            for epoch in range(n_epochs):
                pbar.update(1)
                optim.zero_grad(set_to_none=True)

                dts_torch = theta_2_dt(theta, T, n)

                Ad_list, Bd_list = zip(*[Ad_Bd_from_dt(dt_k, A, B) for dt_k in dts_torch])

                sol_zoh[method] = layer_zoh(*Ad_list, *Bd_list)

                states_sol = [sol_zoh[method][i] for i in range(n)]
                inputs_sol = [sol_zoh[method][n + i] for i in range(n)]
                loss = task_loss(states_sol, inputs_sol, dts_torch, Q, R,
                                method="time_scaled" if method == "time scaled" else "unscaled")
                loss.backward()
                optim.step()

                history_zoh.append({
                    "method": method,
                    "epoch": epoch,
                    "loss": float(loss.item()),
                    "dts": dts_torch.detach().cpu().numpy(),
                })

    save_pickle(out_dir, "sol_zoh", sol_zoh)
    save_pickle(out_dir, "history_zoh", history_zoh)
else:
    sol_zoh = load_pickle(out_dir, "sol_zoh")
    history_zoh = load_pickle(out_dir, "history_zoh")

In [None]:
for method in loss_methods_zoh:
    hist_m = [h for h in history_zoh if h['method'] == method]
    plot_training_res(sol_zoh[method], hist_m, n, sol_method=2)
    save_training_res(
        "out", method.replace(" ", "_") + "_zoh",
        sol_zoh[method], hist_m, n, sol_method=2,
    )

### With Cost Scaling With Interval Duration

In [None]:
loss_methods_zoh_2 = ["time scaled"]

_rm = get_method_run_mode(run_mode, "zoh2", run_overrides)
if _rm != RunMode.DISPLAY:
    n_epochs = get_n_epochs(_rm, "zoh2", n_epochs_override)
    history_zoh_2 = []
    sol_zoh_2 = {}

    for method in loss_methods_zoh_2:
        print(f"ZOH2 Method: {method}")

        theta = torch.nn.Parameter(torch.ones(n, 1))
        optim = torch.optim.Adam([theta], lr=1e-2)

        _, layer_zoh2, _, _, _, _, _, _ = create_exact_param_pann_clqr_2(
            n, s0, n_s, n_u, u_max,
        )

        with tqdm(total=n_epochs) as pbar:
            for epoch in range(n_epochs):
                pbar.update(1)
                optim.zero_grad(set_to_none=True)

                dts_torch = theta_2_dt(theta, T, n)

                Ad_list, Bd_list = zip(*[Ad_Bd_from_dt(dt_k, A, B) for dt_k in dts_torch])
                LQs_list, LRs_list = LQs_LRs_from_dt(dts_torch, Q, R)

                sol_zoh_2[method] = layer_zoh2(*Ad_list, *Bd_list, *LQs_list, *LRs_list)

                states_sol = [sol_zoh_2[method][i] for i in range(n)]
                inputs_sol = [sol_zoh_2[method][n + i] for i in range(n)]
                loss = task_loss(states_sol, inputs_sol, dts_torch, Q, R,
                                method="time_scaled" if method == "time scaled" else "unscaled")
                loss.backward()
                optim.step()

                history_zoh_2.append({
                    "method": method,
                    "epoch": epoch,
                    "loss": float(loss.item()),
                    "dts": dts_torch.detach().cpu().numpy(),
                })

    save_pickle(out_dir, "sol_zoh_2", sol_zoh_2)
    save_pickle(out_dir, "history_zoh_2", history_zoh_2)
else:
    sol_zoh_2 = load_pickle(out_dir, "sol_zoh_2")
    history_zoh_2 = load_pickle(out_dir, "history_zoh_2")

for method in loss_methods_zoh_2:
    hist_m = [h for h in history_zoh_2 if h['method'] == method]
    plot_training_res(sol_zoh_2[method], hist_m, n, sol_method=2)
    save_training_res(
        "out", method.replace(" ", "_") + "_zoh_2",
        sol_zoh_2[method], hist_m, n, sol_method=2,
    )

## DQP with Exact ZOH Discretization and Exact Integrated Cost

This section implements the approach from Pannocchia et al.: **exact ZOH discretization** of both dynamics and quadratic stage cost, with all nonlinear dependence on $\Delta t_k$ computed outside the QP.

### Exact ZOH Integrated Stage Cost

Under ZOH, the state evolves on $[t_k, t_{k+1}]$ as $x(\tau) = e^{A\tau} x_k + \Gamma(\tau) u_k$ where $\Gamma(\tau) = \int_0^\tau e^{As} B\, ds$.
The exact integrated cost over interval $k$ is:
$$\ell_k = \int_0^{\Delta t_k} \bigl(x(\tau)^T Q\, x(\tau) + u_k^T R\, u_k\bigr)\, d\tau = z_k^T \bar{W}_k z_k$$

where $z_k = \begin{bmatrix} x_k \\ u_k \end{bmatrix}$ and $\bar{W}_k = \begin{bmatrix} Q_{d,k} & M_{d,k} \\ M_{d,k}^T & R_{d,k} \end{bmatrix} \succeq 0$ with:
- $Q_{d,k} = \int_0^{\Delta t_k} e^{A^T\tau} Q\, e^{A\tau}\, d\tau$ (observability-like Gramian)
- $M_{d,k} = \int_0^{\Delta t_k} e^{A^T\tau} Q\, \Gamma(\tau)\, d\tau$ (cross term)
- $R_{d,k} = \int_0^{\Delta t_k} \Gamma(\tau)^T Q\, \Gamma(\tau)\, d\tau + \Delta t_k R$

### Van Loan Block Matrix Exponential

Define $\hat{A} = \begin{bmatrix} A & B \\ 0 & 0 \end{bmatrix}$ and $\hat{Q} = \begin{bmatrix} Q & 0 \\ 0 & 0 \end{bmatrix}$. The cost-related integrals form the observability Gramian $W^Q = \int_0^{\Delta t} e^{\hat{A}^T \tau} \hat{Q}\, e^{\hat{A}\tau}\, d\tau$, extracted via:
$$\exp\left(\begin{bmatrix} -\hat{A}^T & \hat{Q} \\ 0 & \hat{A} \end{bmatrix} \Delta t\right) = \begin{bmatrix} \star & e^{-\hat{A}^T \Delta t}\, W^Q \\ 0 & e^{\hat{A}\Delta t} \end{bmatrix}$$

From the bottom-right block we also read $A_d$ and $B_d$. The full cost matrix is $\bar{W} = W^Q + \text{diag}(0, \Delta t\, R)$.

### Why This Stays a QP and Parameter-Affine

The matrices $(\bar{W}_k, A_{d,k}, B_{d,k})$ depend nonlinearly on $\Delta t_k$ through `torch.matrix_exp`, but they are computed **outside** the QP. Inside the QP, they are fixed parameters:
$$\min_{x,u}\;\sum_k \|L_k^T z_k\|^2 \quad \text{s.t. } x_{k+1} = A_{d,k} x_k + B_{d,k} u_k,\; |u_k| \le u_{\max}$$

where $L_k = \text{chol}(\bar{W}_k)$ is a parameter matrix. The expression $L_k^T z_k$ is **affine in both parameters and variables**, so `cp.sum_squares(...)` is DPP-compliant in CVXPY. All nonlinearity in $\Delta t$ resides in the parameter computation, preserving the QP structure.

**Note on terminal cost:** No terminal cost $x_N^T P x_N$ is included, consistent with the other implementations in this notebook.

In [None]:
# Gradient verification for exact ZOH cost pipeline
def test_gradient_flow_zoh3(n_test=20):
    """Verify gradients flow through the full pipeline:
    theta -> dt -> (Ad,Bd,W) via matrix_exp -> QP via CvxpyLayer -> loss."""
    A_t = torch.tensor(A, dtype=torch.float32)
    B_t = torch.tensor(B, dtype=torch.float32)
    Q_t = torch.tensor(Q, dtype=torch.float32)
    R_t = torch.tensor(R, dtype=torch.float32)
    s0_t = torch.tensor(s0, dtype=torch.float32)

    # Build a small CvxpyLayer for n_test
    _, layer_t, _, _, _, _, _, _ = create_exact_zoh_cost_clqr(
        n_test, s0, n_s, n_u, u_max,
    )

    theta_test = torch.nn.Parameter(torch.ones(n_test, 1))
    dts_t = theta_2_dt(theta_test, T, n_test)

    Ad_l, Bd_l, Lx_l, Lu_l, W_l = [], [], [], [], []
    for k in range(n_test):
        Ad_k, Bd_k, W_k = zoh_cost_matrices(dts_t[k], A_t, B_t, Q_t, R_t)
        Ad_l.append(Ad_k)
        Bd_l.append(Bd_k)
        W_l.append(W_k)
        L_k = torch.linalg.cholesky(W_k)
        LT_k = L_k.T
        Lx_l.append(LT_k[:, :n_s])
        Lu_l.append(LT_k[:, n_s:])

    sol_t = layer_t(*Ad_l, *Bd_l, *Lx_l, *Lu_l)

    loss = torch.tensor(0.0, dtype=torch.float32)
    for k in range(n_test):
        s_k = s0_t if k == 0 else sol_t[k - 1]
        u_k = sol_t[n_test + k]
        z_k = torch.cat([s_k, u_k])
        loss = loss + z_k @ W_l[k] @ z_k

    loss.backward()

    print("Exact ZOH Integrated Cost - Gradient Check:")
    print(f"  n_test = {n_test}")
    print(f"  Loss value: {loss.item():.6f}")
    print(f"  theta.grad exists: {theta_test.grad is not None}")
    if theta_test.grad is not None:
        print(f"  theta.grad norm: {theta_test.grad.norm().item():.6f}")
        print(f"  theta.grad min/max: [{theta_test.grad.min().item():.6f}, "
              f"{theta_test.grad.max().item():.6f}]")
        print(f"  All finite: {torch.all(torch.isfinite(theta_test.grad)).item()}")

    # Finite-difference check on 5 random components
    eps = 1e-4
    n_fd = 5
    indices = torch.randperm(n_test)[:n_fd]
    grad_ad = theta_test.grad.flatten()[indices].detach().clone()
    grad_fd = torch.zeros(n_fd)

    for i, idx in enumerate(indices):
        for sign, storage in [(1, 'plus'), (-1, 'minus')]:
            theta_pert = theta_test.detach().clone()
            theta_pert.flatten()[idx] += sign * eps
            dts_p = theta_2_dt(theta_pert, T, n_test)

            Ad_p, Bd_p, Lx_p, Lu_p, W_p = [], [], [], [], []
            for k in range(n_test):
                Ad_k, Bd_k, W_k = zoh_cost_matrices(dts_p[k], A_t, B_t, Q_t, R_t)
                Ad_p.append(Ad_k)
                Bd_p.append(Bd_k)
                W_p.append(W_k)
                L_k = torch.linalg.cholesky(W_k)
                LT_k = L_k.T
                Lx_p.append(LT_k[:, :n_s])
                Lu_p.append(LT_k[:, n_s:])

            with torch.no_grad():
                sol_p = layer_t(*Ad_p, *Bd_p, *Lx_p, *Lu_p)
            loss_p = 0.0
            for k in range(n_test):
                s_k = s0_t if k == 0 else sol_p[k - 1]
                u_k = sol_p[n_test + k]
                z_k = torch.cat([s_k, u_k])
                loss_p += float(z_k @ W_p[k] @ z_k)
            if sign == 1:
                loss_plus = loss_p
            else:
                loss_minus = loss_p
        grad_fd[i] = (loss_plus - loss_minus) / (2 * eps)

    print(f"\n  Finite-difference check ({n_fd} components, eps={eps}):")
    print(f"  {'Idx':>5s}  {'Autodiff':>12s}  {'Fin.Diff.':>12s}  {'Rel.Err':>12s}")
    for i in range(n_fd):
        rel_err = abs(grad_ad[i] - grad_fd[i]) / (abs(grad_ad[i]) + 1e-10)
        print(f"  {indices[i].item():5d}  {grad_ad[i].item():12.6f}  "
              f"{grad_fd[i].item():12.6f}  {rel_err.item():12.6e}")

test_gradient_flow_zoh3(n_test=20)

In [None]:
loss_methods_zoh_3 = ["exact_zoh_integrated"]

_rm = get_method_run_mode(run_mode, "zoh3", run_overrides)
if _rm != RunMode.DISPLAY:
    n_epochs = get_n_epochs(_rm, "zoh3", n_epochs_override)
    history_zoh_3 = []
    sol_zoh_3 = {}

    A_torch = torch.tensor(A, dtype=torch.float32)
    B_torch = torch.tensor(B, dtype=torch.float32)
    Q_torch = torch.tensor(Q, dtype=torch.float32)
    R_torch = torch.tensor(R, dtype=torch.float32)
    s0_torch = torch.tensor(s0, dtype=torch.float32)

    method = "exact_zoh_integrated"

    theta = torch.nn.Parameter(torch.ones(n_zoh3, 1))
    optim = torch.optim.Adam([theta], lr=1e-2)

    _, layer_3, _, _, _, _, _, _ = create_exact_zoh_cost_clqr(
        n_zoh3, s0, n_s, n_u, u_max,
    )

    with tqdm(total=n_epochs) as pbar:
        for epoch in range(n_epochs):
            pbar.update(1)
            optim.zero_grad(set_to_none=True)

            dts_torch = theta_2_dt(theta, T, n_zoh3)

            Ad_list, Bd_list, Lx_list, Lu_list, W_list = [], [], [], [], []
            for k in range(n_zoh3):
                Ad_k, Bd_k, W_k = zoh_cost_matrices(
                    dts_torch[k], A_torch, B_torch, Q_torch, R_torch,
                )
                Ad_list.append(Ad_k)
                Bd_list.append(Bd_k)
                W_list.append(W_k)

                L_k = torch.linalg.cholesky(W_k)
                LT_k = L_k.T
                Lx_list.append(LT_k[:, :n_s])
                Lu_list.append(LT_k[:, n_s:])

            sol_zoh_3[method] = layer_3(*Ad_list, *Bd_list, *Lx_list, *Lu_list)

            loss = torch.tensor(0.0, dtype=torch.float32)
            for k in range(n_zoh3):
                s_k = s0_torch if k == 0 else sol_zoh_3[method][k - 1]
                u_k = sol_zoh_3[method][n_zoh3 + k]
                z_k = torch.cat([s_k, u_k])
                loss = loss + z_k @ W_list[k] @ z_k

            loss.backward()
            optim.step()

            history_zoh_3.append({
                "method": method,
                "epoch": epoch,
                "loss": float(loss.item()),
                "dts": dts_torch.detach().cpu().numpy(),
            })

    _pkl_sol = pickle_name("sol_zoh_3", n_zoh3, n)
    _pkl_hist = pickle_name("history_zoh_3", n_zoh3, n)
    save_pickle(out_dir, _pkl_sol, sol_zoh_3)
    save_pickle(out_dir, _pkl_hist, history_zoh_3)
else:
    _pkl_sol = pickle_name("sol_zoh_3", n_zoh3, n)
    _pkl_hist = pickle_name("history_zoh_3", n_zoh3, n)
    sol_zoh_3 = load_pickle(out_dir, _pkl_sol)
    history_zoh_3 = load_pickle(out_dir, _pkl_hist)

In [None]:
for method in loss_methods_zoh_3:
    hist_m = [h for h in history_zoh_3 if h['method'] == method]
    plot_training_res(sol_zoh_3[method], hist_m, n_zoh3, sol_method=2)
    save_training_res(
        "out", method + "_zoh3",
        sol_zoh_3[method], hist_m, n_zoh3, sol_method=2,
    )

## Sampling Density and Trajectory Change Analysis

### Collect All Methods

In [None]:
method_solutions = {}

method_configs = [
    ("Aux", loss_methods, sol_aux, history_aux, 1, n),
    ("Rep", loss_methods_2, sol_rep, history_rep, 2, n),
    ("HS", loss_methods_hs, sol_hs, history_hs, 2, n),
    ("ZOH", loss_methods_zoh, sol_zoh, history_zoh, 2, n),
    ("ZOH2", loss_methods_zoh_2, sol_zoh_2, history_zoh_2, 2, n),
    ("ZOH3", loss_methods_zoh_3, sol_zoh_3, history_zoh_3, 2, n_zoh3),
]

for prefix, methods, sol_dict, history_list, sol_method, n_method in method_configs:
    try:
        for method in methods:
            label = f"{prefix} (n={n_method}): {method}" if n_method != n else f"{prefix}: {method}"
            method_solutions[label] = {
                'sol': sol_dict[method],
                'history': [h for h in history_list if h['method'] == method],
                'sol_method': sol_method,
                'n': n_method,
            }
    except Exception as e:
        print(f"Could not load {prefix} methods: {e}")

print(f"Loaded {len(method_solutions)} methods: {list(method_solutions.keys())}")

### Sampling Density vs Trajectory Changes

In [None]:
n_methods = len(method_solutions)
n_rows = int(np.ceil(n_methods / 2))
fig, axs = plt.subplots(n_rows, 2, figsize=(10, 2.5 * n_rows), squeeze=False)

for i, (key, ms) in enumerate(method_solutions.items()):
    n_m = ms['n']
    data = extract_trajectory_data(ms, n_m)
    metrics = compute_trajectory_metrics(data, n_m, T)
    plot_density_and_changes(data, metrics, key, colors, axes=axs[i // 2, i % 2])

for j in range(i + 1, n_rows * 2):
    fig.delaxes(axs[j // 2, j % 2])

### Cross-Correlation (Time-Lagged)

Cross correlation of the Sampling Density (SD) with:
- $\| \Delta u \|$
- $\| \Delta s \|_2$
- $\| u \|$
- $\| s \|_2$

On the y-axis, the cross-correlation factor.

The box indicates the maximum CC and the time lag at which it happens.

The dotted lines at $\pm 1.96 / \sqrt{n}$ â‰ˆ Â±0.155 (for n=160) are the 95% CI. Outside bounds -> statistically significant correlation.

- **lag = 0**: Instantaneous correlation
- **lag > 0**: Does high sampling density *precede* large changes?
- **lag < 0**: Does high sampling density *follow* large changes?

In [None]:
for key, ms in method_solutions.items():
    n_m = ms['n']
    data = extract_trajectory_data(ms, n_m)
    metrics = compute_trajectory_metrics(data, n_m, T)
    plot_cross_correlations(data, metrics, key, colors, max_lag=30)