In [None]:
from src.data.data import *
from src.orcml import *
from src.plotting import *
from src.utils.graph_utils import *
from src.isorc import *
from src.utils.embeddings import *
from sklearn.manifold import TSNE
import umap
%load_ext autoreload

exp_params = {
    'mode': 'nbrs',
    'n_neighbors': 15,
    'epsilon': None,
    'lda': 0.01,
    'delta': 0.8
}

In [None]:
n_points = 2500
noise = 6.2
noise_thresh = 2.2

dataset_info = {
    'name': '3D_swiss_roll',
    'n_points': n_points,
    'noise': noise,
    'noise_thresh': noise_thresh
}

return_dict = swiss_roll(n_points=n_points, noise=noise, noise_thresh=noise_thresh, supersample=True, dim=3, hole=False)
swiss_roll_data, color, cluster, swiss_roll_supersample, subsample_indices = return_dict['data'], return_dict['color'], return_dict['cluster'], return_dict['data_supersample'], return_dict['subsample_indices']

In [None]:
%autoreload 2
orcmanl = ORCManL(
    exp_params=exp_params,
    verbose=True,
    reattach=True,
    nbrhood_size=1
)
orcmanl.fit(return_dict['data'])

In [None]:
G_orc = orcmanl.G.copy()

def kernel_distances(k_ij, d_ij, tau):
    """
    Computes the distance of an edge under the logarithmic barrier kernel 
    """
    return -1/tau * np.log(k_ij + 2) + (d_ij * tau + np.log(3))/tau

def compute_kernel_distances(G, tau):
    """
    Computes the distances of all edges in a graph under the logarithmic barrier kernel
    """
    orcs = []
    kdists = []
    for u, v in G.edges():
        d_ij = G[u][v]['weight']
        k_ij = G[u][v]['ricciCurvature']
        G[u][v]['kernel_distance'] = kernel_distances(k_ij, d_ij, tau)
        kdists.append(G[u][v]['kernel_distance'])
        orcs.append(k_ij)
    return G, kdists, orcs

In [None]:
G_orc, kdists, orcs = compute_kernel_distances(G_orc, 0.1)

In [None]:
# get shortest paths
paths = nx.all_pairs_all_shortest_paths(G_orc, weight='kernel_distance')
# create sp distance matrix using paths but distances determined by A
sp_dist_matrix = np.zeros((len(G_orc.nodes), len(G_orc.nodes)))
for u, u_paths in tqdm(paths, total=len(G_orc.nodes)):
    for v, uv_path in u_paths.items():
        if u == v:
            continue
        uv_path = uv_path[0]
        pathlen = 0
        for i in range(len(uv_path) - 1):
            dist = G_orc[uv_path[i]][uv_path[i+1]]['weight']
            assert dist > 0, "Distances should be positive"
            pathlen += dist
        sp_dist_matrix[u][v] = pathlen
        sp_dist_matrix[v][u] = pathlen

In [None]:
print(sp_dist_matrix[sp_dist_matrix != 0])

In [None]:
from sklearn.manifold import Isomap

isomap = Isomap(n_components=2, metric='precomputed')
emb = isomap.fit_transform(sp_dist_matrix)

plot_data_2D(emb, color=color, title=None)

In [None]:
%autoreload 2
import gc
torch.cuda.empty_cache()
gc.collect()

isorc = ISORC(orcmanl, dim=2, init='spectral')
X_opt, losses = isorc.fit_graph(lr=0.5, n_iter=500)

plt.plot(losses)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.show()

In [None]:
# get list of shortcut edges from orcmanl
shortcut_edges_indices = orcmanl.shortcut_edges
# convert to binary array
shortcut_edges = np.zeros(len(orcmanl.G.edges))
shortcut_edges[shortcut_edges_indices] = 1

In [None]:
plot_graph_2D(X_opt, orcmanl.G, title=None, node_color=return_dict['color'][orcmanl.G.nodes()], edge_width=0.25, edge_color=shortcut_edges)

In [None]:
# quadratics
n_points = 2000
noise = 0.20
noise_thresh = 0.45

return_dict = quadratics(n_points=n_points, noise=noise, noise_thresh=noise_thresh, supersample=True)

In [None]:
orcmanl = ORCManL(
    exp_params=exp_params,
    verbose=True,
    reattach=True
)
orcmanl.fit(return_dict['data'])

In [None]:
plot_graph_2D(return_dict['data'], orcmanl.G_pruned, title=None)

In [None]:
%autoreload 2
import gc
torch.cuda.empty_cache()
gc.collect()

isorc = ISORC(orcmanl, dim=2, init='spectral')
X_opt, losses = isorc.fit_graph(lr=0.2, n_iter=500)

plt.plot(losses)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.show()

In [None]:
%autoreload 2
plot_graph_2D(X_opt, orcmanl.G, title=None, node_color=return_dict['cluster'][orcmanl.G.nodes()])

In [None]:
# quadratics # more noise
n_points = 2000
noise = 0.20
noise_thresh = 0.5

return_dict = quadratics(n_points=n_points, noise=noise, noise_thresh=noise_thresh, supersample=True)

In [None]:
orcmanl = ORCManL(
    exp_params=exp_params,
    verbose=True,
    reattach=True
)
orcmanl.fit(return_dict['data'])

In [None]:
plot_graph_2D(return_dict['data'], orcmanl.G_pruned, title=None)

In [None]:
%autoreload 2
import gc
torch.cuda.empty_cache()
gc.collect()

isorc = ISORC(orcmanl, dim=2, init='spectral')
X_opt, losses = isorc.fit_graph(lr=0.2, n_iter=500, weight_orc=True, p=10)

plt.plot(losses)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.show()

In [None]:
%autoreload 2
plot_graph_2D(X_opt, orcmanl.G, title=None, node_color=return_dict['cluster'][orcmanl.G.nodes()], edge_color=orcmanl.orcs)

In [None]:
print(isorc.W[isorc.W != 0])