 ------This Jupyter notebook is authored by J. Antonio Sidaoui, jas2545@columbia.edu------

# Example 3.3

In [None]:
## Exact sampling
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing

# Set seed for reproducibility
np.random.seed(1906974547)

# Parameters
n_samples = 500
dt = 0.01
T = n_samples * dt
time = np.linspace(0, T, n_samples)
window_size = 50

# OU parameters
theta_dim = 2
theta = np.zeros((n_samples, theta_dim))
X = np.zeros(n_samples)
Y = np.zeros(n_samples)

# Initial condition
theta[0] = np.random.normal(0, 1, theta_dim)
X[0] = np.sum(theta[0]**2)
Y[0] = theta[0, 0]

# Exact OU sampling: each component of theta follows dθ = -θ dt + √2 dW
for t in range(1, n_samples):
    mean = np.exp(-dt) * theta[t-1]
    var = 1 - np.exp(-2*dt)
    theta[t] = np.random.normal(mean, np.sqrt(var))
    X[t] = np.sum(theta[t]**2)
    Y[t] = theta[t, 0]

# Stack data matrix (X, Y)
data = np.column_stack((X, Y))

# Mean-center the data
scaler = preprocessing.StandardScaler(with_std=False).fit(data)
data = scaler.transform(data)

# Function to compute windowed covariance
def windowed_covariance(data, center, window_size):
    half_window = window_size // 2
    start = max(center - half_window, 0)
    end = min(center + half_window + 1, len(data))
    window = data[start:end]
    return np.cov(window.T)

# Compute windowed local covariance matrices
C_matrices = np.zeros((n_samples, 2, 2))

for s in range(n_samples):
    C_matrices[s] = windowed_covariance(data, s, window_size)

In [None]:
from numpy.linalg import inv, LinAlgError

# Initialize distance matrix
distance_matrix = np.zeros((n_samples, n_samples))

# Compute pairwise modified Mahalanobis distances
for t in range(n_samples):
    x_t = data[t]
    try:
        C_inv_t = inv(C_matrices[t])
    except LinAlgError:
        C_inv_t = np.linalg.pinv(C_matrices[t])  # fallback if not invertible

    for s in range(t, n_samples):  # Use symmetry: d(t,s) = d(s,t)
        x_s = data[s]
        try:
            C_inv_s = inv(C_matrices[s])
        except LinAlgError:
            C_inv_s = np.linalg.pinv(C_matrices[s])

        delta = x_t - x_s
        M = 0.5 * (C_inv_t + C_inv_s)
        dist = np.sqrt(delta @ M @ delta.T)
        distance_matrix[t, s] = dist
        distance_matrix[s, t] = dist  # symmetry

# visualize the distance matrix
import matplotlib.pyplot as plt

plt.figure(figsize=(6, 5))
plt.imshow(distance_matrix, cmap='viridis', aspect='auto')
plt.colorbar(label='Modified Mahalanobis Distance')
plt.title('Pairwise Distances $d(x(t), x(s))$')
plt.xlabel('s')
plt.ylabel('t')
plt.tight_layout()
plt.show()

In [None]:
## Eigenfunction Recovery

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
from scipy.special import factorial

# Construct kernel matrix
W = compute_affinity_matrix(distance_matrix, kernel_type='gaussian', sigma=np.median(distance_matrix))

# Compute diffusion map eigenvectors and eigenvalues
diff_vec, diff_eig = diff_map_info(W)  # Columns of diff_vec are eigenvectors

# Estimate generator spectrum eigenvalues
epsilon = np.median(distance_matrix)
generator_eig_est = np.log(diff_eig) / epsilon

# Compute ground truth Hermite polynomials h_{i,j}(theta_t)
from numpy.polynomial.hermite import hermval

def normalized_hermite(k, x):
    coeffs = np.zeros(k+1)
    coeffs[k] = 1
    return hermval(x, coeffs) / np.sqrt(factorial(k))

max_order = 4
h_basis = []
ij_labels = []

for i in range(max_order+1):
    for j in range(max_order+1 - i):  # only include terms with i + j ≤ max_order
        f_ij = normalized_hermite(i, theta[:,0]) * normalized_hermite(j, theta[:,1])
        h_basis.append(f_ij)
        ij_labels.append(f"h_{i},{j}")

h_basis = np.array(h_basis)  # shape: (num_basis, n_samples)

# Compute correlation matrix between diffusion eigenvectors and Hermite basis
num_diff_eigs = min(len(diff_vec.T), len(h_basis))
C = np.zeros((num_diff_eigs, len(h_basis)))

for k in range(num_diff_eigs):
    for l in range(len(h_basis)):
        corr, _ = pearsonr(diff_vec[:, k], h_basis[l])
        C[k, l] = abs(corr)

# Plot heatmap
plt.figure(figsize=(12, 6))
sns.heatmap(C[:, 1:], xticklabels=ij_labels[1:], yticklabels=[f"$\phi_{k+1}$" for k in range(num_diff_eigs)],
            cmap='viridis', annot=True, fmt=".2f", cbar_kws={'label': 'Correlation'})
plt.title("Correlation between Diffusion Map Eigenvectors and Hermite Eigenfunctions")
plt.xlabel("Ground Truth Hermite Eigenfunctions $h_{i,j}$")
plt.ylabel("Diffusion Coordinates $\\phi_k$")
plt.tight_layout()
plt.show()

In [None]:
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

# Extract phi_1 and h_{1,0}
phi_1 = diff_vec[:, 0].reshape(-1, 1)
h_10 = h_basis[5]

# Fit linear model: h_10 ≈ a * phi_1 + b => rescale phi_1
reg = LinearRegression(fit_intercept=False).fit(phi_1, h_10)
phi_1_scaled = reg.predict(phi_1)

# Plot
plt.figure(figsize=(10, 4))
plt.plot(h_10, label=r"$h_{1,0}$", lw=2)
plt.plot(phi_1_scaled, label=r"scaled $\phi_1$", linestyle='--', lw=2)
plt.title(r"Time Series Comparison: $h_{1,0}$ vs. scaled $\phi_1$")
plt.xlabel("Time Index")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

# Choose phi_k and its best-matched Hermite function h_{i,j}
phi_idx = 2
hermite_idx = 8

# Recover i, j from label
ij_str = ij_labels[hermite_idx].replace("h_", "")
i, j = map(int, ij_str.split(","))

# Create a grid for smooth surface plotting
n_grid = 100  # controls resolution
x_min, x_max = theta[:, 0].min(), theta[:, 0].max()
y_min, y_max = theta[:, 1].min(), theta[:, 1].max()
x_grid = np.linspace(x_min, x_max, n_grid)
y_grid = np.linspace(y_min, y_max, n_grid)
X, Y = np.meshgrid(x_grid, y_grid)

# Evaluate Hermite function on the grid
Z = normalized_hermite(i, X) * normalized_hermite(j, Y)

# Compute predicted diffusion coordinate for scatter (optional)
phi_scaled = reg.predict(phi_1)

# Plot
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')

# Smooth Hermite surface
surf = ax.plot_surface(X, Y, Z, cmap=cm.viridis, alpha=0.8, linewidth=0, antialiased=True)

# Scatter original data points with scaled phi
ax.scatter(theta[:, 0], theta[:, 1], phi_scaled, c='r', s=10, label=fr'Scaled $\phi_{{{phi_idx+1}}}$')

ax.set_xlabel(r'$\theta_1$')
ax.set_ylabel(r'$\theta_2$')
ax.set_zlabel(fr'$h_{{{i},{j}}}(\theta)$')
ax.set_title(fr"Hermite $h_{{{i},{j}}}$ vs. Scaled Diffusion Coordinate $\phi_{{{phi_idx+1}}}$")
ax.legend()
ax.view_init(elev=20, azim=-130)
plt.tight_layout()
plt.show()

In [None]:
## Lifting Error

k = 10  # or choose based on eigenvalue decay
Phi_k = diff_vec[:, :k]  # shape: (T, k)

# Compute lifting operators
H_X = data[:, 0].T @ Phi_k  # shape (dx, k)
H_Y = data[:, 1].T @ Phi_k  # shape (dy, k)

# Lifted (approximated) observables
X_lifted = Phi_k @ H_X.T  # shape (T, dx)
Y_lifted = Phi_k @ H_Y.T  # shape (T, dy)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(14, 4))

plt.subplot(1, 2, 1)
plt.plot(data[:, 0], label="Original $X_t$", lw=2)
plt.plot(X_lifted, '--', label="Lifted $\hat{X}_t$", lw=2)
plt.title("Original vs. Lifted $X_t$")
plt.grid(True)
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(data[:, 1], label="Original $Y_t$", lw=2)
plt.plot(Y_lifted, '--', label="Lifted $\hat{Y}_t$", lw=2)
plt.title("Original vs. Lifted $Y_t$")
plt.grid(True)
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Plot reconstruction against true X surface
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

# Create a uniform grid over the domain of theta
n_grid = 100
x_min, x_max = theta[:, 0].min(), theta[:, 0].max()
y_min, y_max = theta[:, 1].min(), theta[:, 1].max()
x_grid = np.linspace(x_min, x_max, n_grid)
y_grid = np.linspace(y_min, y_max, n_grid)
Xg, Yg = np.meshgrid(x_grid, y_grid)

# Compute true surfaces for X = θ₁² + θ₂² and Y = θ₁ on the grid
Z_X_true = Xg**2 + Yg**2
Z_Y_true = Xg

# Evaluate true values at theta locations
X_true = data[:, 0]
Y_true = data[:, 1]

# Plot
fig = plt.figure(figsize=(12, 5))

# --- First subplot: X surface ---
ax1 = fig.add_subplot(121, projection='3d')
surf1 = ax1.plot_surface(Xg, Yg, Z_X_true, cmap='viridis', alpha=0.7, linewidth=0, antialiased=True)

# Scatter lifted X values
ax1.scatter(theta[:, 0], theta[:, 1], X_lifted.T.flatten(), c='r', s=10, label='Lifted $X$')

# Stem lines from true X to lifted X
for x, y, z_true, z_lift in zip(theta[:, 0], theta[:, 1], X_true, X_lifted.T.flatten()):
    ax1.plot([x, x], [y, y], [z_true, z_lift], color='blue', linewidth=0.5)

ax1.set_xlabel(r'$\theta_1$')
ax1.set_ylabel(r'$\theta_2$')
ax1.set_zlabel(r'$X$')
ax1.set_title("True $X(\theta)$ Surface vs. Lifted Reconstruction")
ax1.legend()

# --- Second subplot: Y surface ---
ax2 = fig.add_subplot(122, projection='3d')
surf2 = ax2.plot_surface(Xg, Yg, Z_Y_true, cmap='viridis', alpha=0.6, linewidth=0, antialiased=True)

# Scatter lifted Y values
ax2.scatter(theta[:, 0], theta[:, 1], Y_lifted.T.flatten(), c='r', s=10, label='Lifted $Y$')

# Stem lines from true Y to lifted Y
for x, y, z_true, z_lift in zip(theta[:, 0], theta[:, 1], Y_true, Y_lifted.T.flatten()):
    ax2.plot([x, x], [y, y], [z_true, z_lift], color='blue', linewidth=0.9)

ax2.set_xlabel(r'$\theta_1$')
ax2.set_ylabel(r'$\theta_2$')
ax2.set_zlabel(r'$Y$')
ax2.set_title("True $Y(\theta)$ Surface vs. Lifted Reconstruction")
ax2.legend()

plt.tight_layout()
plt.show()