In [None]:
import numpy as np
import scipy
import matplotlib.pyplot as plt

import datafold.pcfold as pfold
import datafold.dynfold as dfold

from datafold.pcfold.kernels import MahalanobisKernel

def dmap(data, cov_matrices, n_evecs=10, verbosity=2, k=25, random_state=1):
    pcm = pfold.PCManifold(data)
    pcm.optimize_parameters(random_state=random_state, k=k, result_scaling=1)
    
    _k = MahalanobisKernel(verbosity_level=verbosity,
                           epsilon=None,
                           cov_matrices=cov_matrices)
    _dmap = dfold.DiffusionMaps(n_eigenpairs=n_evecs,
                                kernel=_k,
                                dist_kwargs={"cut_off": pcm.cut_off, "kmin": k})
    _dmap.fit(pcm)
    return _dmap.eigenvectors_, _dmap.eigenvalues_

# Create example data

In [None]:
random_state=1
n_pts = 1000
n_neighbors = 100
eps_covariance = 1e-2
pinv_rcond = 1e-4

rng=np.random.default_rng(random_state)

def transformation(x):
    return np.column_stack([
        x[:,0]+x[:,1]**3,
        x[:,1]-x[:,0]**3
    ])

data_x = rng.uniform(low=0, high=1, size=(n_pts,2))
data_y = transformation(data_x)

# sample covariance data from neighborhoods
covariances = np.zeros((n_pts, 2, 2))
for k in range(n_pts):
    neighbors = rng.normal(loc=data_x[k, :], scale=eps_covariance, size=(n_neighbors, 2))
    covariances[k,:,:] = np.cov(neighbors.T)

In [None]:
# compute the pseudo inverses of the covariances and pass them to the metric

covariances_inv = np.zeros_like(covariances)

pinv_tol = np.exp(-10)

some_evals1 = []
some_ranks1 = []

from time import time
t0 = time()
print("Computing %g inverse matrices..." % (n_pts), end="")
for k in range(n_pts):
    covariances_inv[k,:,:] = np.linalg.pinv(covariances_inv[k,:,:], rcond=pinv_rcond)
    if k < 1000:
        evals = np.linalg.eigvals(covariances[k,:,:])
        some_evals1.append(evals)
        some_ranks1.append(np.sum(evals > pinv_tol))
print(f"done in {time()-t0} seconds.")

some_evals1 = np.row_stack(some_evals1)
some_ranks1 = np.row_stack(some_ranks1)

In [None]:
fig,ax=plt.subplots(1,2,figsize=(8,4))
ax[0].hist(np.log(np.abs(some_evals1.ravel())), 150)
ax[0].plot([np.log(pinv_tol), np.log(pinv_tol)], [0,300], 'r-')
ax[0].set_title('covariance eigenvalue distribution')
ax[0].set_xlabel(r'log $\lambda$')

rank_bins = np.arange(0,10)-.5
ax[1].hist(some_ranks1, rank_bins-.05, alpha=.5)
ax[1].set_xlim([0,5])
ax[1].set_title('covariance rank distribution')
ax[1].set_xlabel('rank')

In [None]:
# compute DMAPS with the given mahalanobis metric
n_evecs = 10

t0=time()

evecs1,evals1 = dmap(data_y, cov_matrices=covariances_inv, n_evecs=n_evecs)

print(f"First dmap done in {time()-t0} seconds")

In [None]:
rng = np.random.default_rng(random_state)

lrs = dfold.LocalRegressionSelection(intrinsic_dim=2, n_subsample=500, strategy="dim")
selection1 = lrs.fit(evecs1)

idx_ev = rng.permutation(evecs1.shape[0])[0:2000]

fig,ax = plt.subplots(1,2,figsize=(6,3))
ax[0].plot(selection1.residuals_,'.-')

ax[1].scatter(evecs1[idx_ev, selection1.evec_indices_[0]],
              evecs1[idx_ev, selection1.evec_indices_[1]],s=.5,c=data_x[idx_ev,0])
ax[1].set_title('embedding network 1');
ax[1].set_xlabel(r'$\phi_1$')
ax[1].set_ylabel(r'$\phi_2$')

fig.tight_layout()