In [1]:
import math
import numpy as np
from diffusions_maps import diffusion_map
from noise_dim import add_noise_dim
import matplotlib.pyplot as plt
from sklearn.datasets import make_swiss_roll
from sklearn.datasets import make_s_curve
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
%matplotlib inline
plt.rcParams['figure.dpi'] = 50 # For memory saving purposes



import datafold.dynfold as dfold
import datafold.pcfold as pfold
from datafold.dynfold import LocalRegressionSelection
from datafold.utils.plot import plot_pairwise_eigenvector

## 1000 datapoints

In [2]:
nr_samples = 1000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [3]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.041111998254934876, cut-off=0.8702361717373521


In [4]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

225 ms ± 9.18 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 2000 datapoints

In [5]:
nr_samples = 2000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [6]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.012289096715144032, cut-off=0.4757872709743525


In [7]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

376 ms ± 36.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 3000 datapoints

In [8]:
nr_samples = 3000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [9]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.009053107494079438, cut-off=0.40836797485738274


In [10]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

654 ms ± 36.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 4000 datapoints

In [11]:
nr_samples = 4000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [12]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.006550598625146456, cut-off=0.34737081909048745


In [13]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

1.17 s ± 27.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 5000 datapoints

In [14]:
nr_samples = 5000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [15]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.00460892289380875, cut-off=0.2913751828834159


In [16]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

1.51 s ± 32.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 6000 datapoints

In [17]:
nr_samples = 6000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [18]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.004909227629905164, cut-off=0.3007179989123913


In [19]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

2.34 s ± 60.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 7000 datapoints

In [20]:
nr_samples = 7000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [21]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.0036204058946027453, cut-off=0.2582447311137256


In [22]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

2.9 s ± 89.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 8000 datapoints

In [23]:
nr_samples = 8000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [24]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.002773105378465487, cut-off=0.2260143553981692


In [25]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

3.71 s ± 69.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 9000 datapoints

In [26]:
nr_samples = 9000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [27]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.0034616037819408807, cut-off=0.25251752044005


In [28]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

4.54 s ± 205 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 10000 datapoints

In [29]:
nr_samples = 10000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [30]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.0027650652665002734, cut-off=0.22568647391988983


In [31]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

5.75 s ± 207 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 11000 datapoints

In [32]:
nr_samples = 11000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [33]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.0021587106793084765, cut-off=0.19941143458212718


In [34]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

6.35 s ± 192 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 12000 datapoints

In [35]:
nr_samples = 12000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [36]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.0023821700942910293, cut-off=0.20947838739289062


In [37]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

8.07 s ± 203 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 13000 datapoints

In [38]:
nr_samples = 13000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [39]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.0016827086933251963, cut-off=0.1760586255337031


In [40]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

8.44 s ± 222 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 14000 datapoints

In [41]:
nr_samples = 14000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [42]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.0015671897401275045, cut-off=0.16990792173435126


In [43]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

9 s ± 504 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 15000 datapoints

In [44]:
nr_samples = 15000
idx_plot = np.random.permutation(nr_samples)[0:nr_samples]
X, X_color = make_s_curve(nr_samples, random_state=3, noise=0)

In [45]:
X_pcm = pfold.PCManifold(X)
X_pcm.optimize_parameters()

print(f"epsilon={X_pcm.kernel.epsilon}, cut-off={X_pcm.cut_off}")

epsilon=0.0018332392635542376, cut-off=0.18376483668322122


In [46]:
dmap = dfold.DiffusionMaps(
    kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
    n_eigenpairs=9,
    dist_kwargs=dict(cut_off=X_pcm.cut_off),
)
%timeit dmap1 = dmap.fit(X_pcm)

12.1 s ± 316 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
