In [1]:
import math
import numpy as np
from diffusions_maps import diffusion_map
from noise_dim import add_noise_dim
import matplotlib.pyplot as plt
from sklearn.datasets import make_swiss_roll
from sklearn.datasets import make_s_curve
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
%matplotlib inline
plt.rcParams['figure.dpi'] = 50 # For memory saving purposes



import datafold.dynfold as dfold
import datafold.pcfold as pfold
from datafold.dynfold import LocalRegressionSelection
from datafold.utils.plot import plot_pairwise_eigenvector

## 1 noise dimension

In [2]:
nr_samples = 5000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X_wo_n, X_color = make_swiss_roll(nr_samples, random_state=3, noise=0)

In [3]:
# Adding noise to data
X = add_noise_dim(X_wo_n,1)

In [4]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.6061576354216172, cut-off=3.3415320262733803


In [5]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)


1.67 s ± 64.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 2 noise dimensions

In [6]:
nr_samples = 5000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X_wo_n, X_color = make_swiss_roll(nr_samples, random_state=3, noise=0)

In [7]:
# Adding noise to data
X = add_noise_dim(X_wo_n,2)

In [8]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.5991858620897456, cut-off=3.3222599946188853


In [9]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

1.69 s ± 85.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 3 noise dimensions

In [10]:
nr_samples = 5000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X_wo_n, X_color = make_swiss_roll(nr_samples, random_state=3, noise=0)

In [11]:
# Adding noise to data
X = add_noise_dim(X_wo_n,3)

In [12]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=1.0133635150771994, cut-off=4.320514528248511


In [13]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)


2.28 s ± 45.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 5 noise dimensions

In [14]:
nr_samples = 5000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X_wo_n, X_color = make_swiss_roll(nr_samples, random_state=3, noise=0)

In [15]:
# Adding noise to data
X = add_noise_dim(X_wo_n,5)

In [16]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.6861069406325341, cut-off=3.5550748106898946


In [17]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)


1.97 s ± 38.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 10 noise dimensions

In [18]:
nr_samples = 5000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X_wo_n, X_color = make_swiss_roll(nr_samples, random_state=3, noise=0)

In [19]:
# Adding noise to data
X = add_noise_dim(X_wo_n,10)

In [20]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.8331958965213554, cut-off=3.9176569026640222


In [21]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)


2.35 s ± 79.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 20 noise dimensions

In [22]:
nr_samples = 5000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X_wo_n, X_color = make_swiss_roll(nr_samples, random_state=3, noise=0)

In [23]:
# Adding noise to data
X = add_noise_dim(X_wo_n,20)

In [24]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=1.3643703470158928, cut-off=5.013245513526668


In [25]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)


3.89 s ± 85.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
