In [None]:
from src.data.data import *
from src.embedor import *
from src.plotting import *
import pandas as pd
# from sklearn_extra.cluster import KMedoids
from sklearn.cluster import HDBSCAN
import seaborn as sns
sns.set_theme()
%load_ext autoreload

In [None]:
# concentric circles
%autoreload 2
n_points = 2500
noise = 0.1
noise_thresh = None
return_dict = concentric_circles(n_points=n_points, factor=0.4, noise=noise, noise_thresh=noise_thresh)
labels = return_dict['cluster']

In [None]:
%autoreload 2
# embedor apsp
embedor = EmbedOR()
emb = embedor.fit_transform(return_dict['data'])

In [None]:
# isomap apsp
A_euc = nx.to_numpy_array(embedor.G, weight='weight')
apsp_euc = scipy.sparse.csgraph.shortest_path(A_euc, unweighted=False, directed=False)

In [None]:
# diffusion distance
from pydiffmap import diffusion_map as dm
# dmap = dm.DiffusionMap.from_sklearn(n_evecs=10)
def diffusion_transition_matrix(W, epsilon):
    """
    Compute the transition matrix for diffusion maps from a weighted adjacency matrix.

    Parameters:
        W (numpy.ndarray): Weighted adjacency matrix (NxN).

    Returns:
        numpy.ndarray: Transition matrix (NxN).
    """
    # symmetrize the matrix
    W = (W + W.T) / 2
    edge_mask = W > 0
    # convert to affinity matrix
    W = np.exp(-W**2 / epsilon)
    W[~edge_mask] = 0  # Set non-edges to zero
    D = np.sum(W, axis=1)  # Compute the degree vector
    D_inv = np.diag(1.0 / D)  # Compute D^(-1)
    
    return D_inv @ W  # Compute P = D^(-1) W

transition_mat = diffusion_transition_matrix(A_euc, epsilon=0.8)
assert np.allclose(np.sum(transition_mat, axis=1), 1), "Transition matrix rows do not sum to 1."
ts = [10, 20, 40]
transition_mat_list = [np.linalg.matrix_power(transition_mat, t) for t in ts]
# compute pairwise distance by taking euclidean distance of columns
from sklearn.metrics import pairwise_distances
diff_dists = [pairwise_distances(transition_mat, transition_mat)**2 for transition_mat in transition_mat_list]

In [None]:
dbscan_ambient = HDBSCAN().fit(return_dict['data'])
dbscan_energy = HDBSCAN(metric='precomputed').fit(embedor.apsp_energy)
dbscan_euc = HDBSCAN(metric='precomputed').fit(apsp_euc)
dbscan_diff = HDBSCAN(metric='precomputed').fit(diff_dists[0])
dbscan_diff2 = HDBSCAN(metric='precomputed').fit(diff_dists[1])
dbscan_diff3 = HDBSCAN(metric='precomputed').fit(diff_dists[2])

In [None]:
# compute adjusted rand index
from sklearn.metrics import adjusted_rand_score
# adjusted rand index
ari_ambient = adjusted_rand_score(labels, dbscan_ambient.labels_)
ari_energy = adjusted_rand_score(labels, dbscan_energy.labels_)
ari_euc = adjusted_rand_score(labels, dbscan_euc.labels_)
ari_diff = adjusted_rand_score(labels, dbscan_diff.labels_)
ari_diff2 = adjusted_rand_score(labels, dbscan_diff2.labels_)
ari_diff3 = adjusted_rand_score(labels, dbscan_diff3.labels_)

print(f'Adjusted Rand Index (ambient): {ari_ambient}')
print(f'Adjusted Rand Index (energy): {ari_energy}')
print(f'Adjusted Rand Index (euclidean): {ari_euc}')
print(f'Adjusted Rand Index (diffusion, t={ts[0]}): {ari_diff}')
print(f'Adjusted Rand Index (diffusion, t={ts[1]}): {ari_diff2}')
print(f'Adjusted Rand Index (diffusion, t={ts[2]}): {ari_diff3}')


In [None]:
embedor.plot_spectral_init()

In [None]:
def energy_to_affinity(energy, sigma):
    affinity = np.exp(-sigma*energy**2)
    return affinity

apsp_energy = embedor.apsp_energy.copy()
apsp_affinities = energy_to_affinity(apsp_energy, 1)
plt.figure(figsize=(10, 10))
sns.heatmap(apsp_affinities, cmap='viridis', cbar=False, xticklabels=False, yticklabels=False)

euc_affinities = energy_to_affinity(apsp_euc, 1)
# plot heatmap
plt.figure(figsize=(10, 10))
sns.heatmap(euc_affinities, cmap='viridis', cbar=False, xticklabels=False, yticklabels=False)

# plot heatmap
diff_affinities = energy_to_affinity(diff_dists[0], 1)
plt.figure(figsize=(10, 10))
sns.heatmap(diff_affinities, cmap='viridis', cbar=False, xticklabels=False, yticklabels=False)

# plot heatmap
diff_affinities2 = energy_to_affinity(diff_dists[1], 1)
plt.figure(figsize=(10, 10))
sns.heatmap(diff_affinities2, cmap='viridis', cbar=False, xticklabels=False, yticklabels=False)

# plot heatmap
diff_affinities3 = energy_to_affinity(diff_dists[2], 1)
plt.figure(figsize=(10, 10))
sns.heatmap(diff_affinities3, cmap='viridis', cbar=False, xticklabels=False, yticklabels=False)

In [None]:
%autoreload 2
n_points = 3000
noise = 0.5
noise_thresh = None
return_dict = torus(n_points=n_points, noise=noise, noise_thresh=noise_thresh, supersample=False, double=True)
labels = return_dict['cluster']
# sort by label
sort_idx = np.argsort(labels)
return_dict['data'] = return_dict['data'][sort_idx]
labels = labels[sort_idx]

In [None]:
%autoreload 2
# embedor apsp
embedor = EmbedOR()
emb = embedor.fit_transform(return_dict['data'])

In [None]:
# isomap apsp
A_euc = nx.to_numpy_array(embedor.G, weight='weight')
apsp_euc = scipy.sparse.csgraph.shortest_path(A_euc, unweighted=False, directed=False)

In [None]:

transition_mat = diffusion_transition_matrix(A_euc, epsilon=0.8)
assert np.allclose(np.sum(transition_mat, axis=1), 1), "Transition matrix rows do not sum to 1."
ts = [10, 20, 40]
transition_mat_list = [np.linalg.matrix_power(transition_mat, t) for t in ts]
# compute pairwise distance by taking euclidean distance of columns
from sklearn.metrics import pairwise_distances
diff_dists = [pairwise_distances(transition_mat, transition_mat)**2 for transition_mat in transition_mat_list]

In [None]:
dbscan_ambient = HDBSCAN().fit(return_dict['data'])
dbscan_energy = HDBSCAN(metric='precomputed').fit(embedor.apsp_energy)
dbscan_euc = HDBSCAN(metric='precomputed').fit(apsp_euc)
dbscan_diff = HDBSCAN(metric='precomputed').fit(diff_dists[0])
dbscan_diff2 = HDBSCAN(metric='precomputed').fit(diff_dists[1])
dbscan_diff3 = HDBSCAN(metric='precomputed').fit(diff_dists[2])

In [None]:
# compute adjusted rand index
from sklearn.metrics import adjusted_rand_score
# adjusted rand index
ari_ambient = adjusted_rand_score(labels, dbscan_ambient.labels_)
ari_energy = adjusted_rand_score(labels, dbscan_energy.labels_)
ari_euc = adjusted_rand_score(labels, dbscan_euc.labels_)
ari_diff = adjusted_rand_score(labels, dbscan_diff.labels_)
ari_diff2 = adjusted_rand_score(labels, dbscan_diff2.labels_)
ari_diff3 = adjusted_rand_score(labels, dbscan_diff3.labels_)

print(f'Adjusted Rand Index (ambient): {ari_ambient}')
print(f'Adjusted Rand Index (energy): {ari_energy}')
print(f'Adjusted Rand Index (euclidean): {ari_euc}')
print(f'Adjusted Rand Index (diffusion, t={ts[0]}): {ari_diff}')
print(f'Adjusted Rand Index (diffusion, t={ts[1]}): {ari_diff2}')
print(f'Adjusted Rand Index (diffusion, t={ts[2]}): {ari_diff3}')


In [None]:
embedor.plot_spectral_init()

In [None]:
def energy_to_affinity(energy, sigma):
    affinity = np.exp(-sigma*energy**2)
    return affinity

apsp_energy = embedor.apsp_energy.copy()
apsp_affinities = energy_to_affinity(apsp_energy, 1)
plt.figure(figsize=(10, 10))
sns.heatmap(apsp_affinities, cmap='viridis', cbar=False, xticklabels=False, yticklabels=False)

euc_affinities = energy_to_affinity(apsp_euc, 0.01)
# plot heatmap
plt.figure(figsize=(10, 10))
sns.heatmap(euc_affinities, cmap='viridis', cbar=False, xticklabels=False, yticklabels=False)

# plot heatmap
diff_affinities = energy_to_affinity(diff_dists[0], 1)
plt.figure(figsize=(10, 10))
sns.heatmap(diff_affinities, cmap='viridis', cbar=False, xticklabels=False, yticklabels=False)

# plot heatmap
diff_affinities2 = energy_to_affinity(diff_dists[1], 1)
plt.figure(figsize=(10, 10))
sns.heatmap(diff_affinities2, cmap='viridis', cbar=False, xticklabels=False, yticklabels=False)

# plot heatmap
diff_affinities3 = energy_to_affinity(diff_dists[2], 1)
plt.figure(figsize=(10, 10))
sns.heatmap(diff_affinities3, cmap='viridis', cbar=False, xticklabels=False, yticklabels=False)