In [1]:
import numpy as np
import osmnx as ox
import networkx as nx

import seaborn as sns
import matplotlib.pyplot as plt

import random
import pickle, time
import pymde
from sklearn.manifold import MDS, Isomap, TSNE, LocallyLinearEmbedding, SpectralEmbedding
from scipy import sparse

import mlrfit as mf
import lrrouting as ldr

import cvxpy as cp
import numba as nb

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
np.random.seed(1001)
random.seed(1001)

#  Matrix definition

In [3]:
rank = 6

mtype = "small_world"
n = 7020
# G = nx.connected_watts_strogatz_graph(n, k=4, p=0.1)
# G.remove_edges_from(nx.selfloop_edges(G))
# G = nx.DiGraph(G)

beta = 0.7
gamma = 0.01
G = nx.scale_free_graph(n, alpha=1-beta-gamma, beta=beta, gamma=gamma)#alpha=0.41, beta=0.54)

n = G.number_of_nodes()
print(f"{n=}, {G.number_of_edges()=}")

# for u, v in G.edges():
#     G[u][v]['weight'] = np.random.rand() * 10

Adj, Dist, nodes_cc = ldr.nx_graph_to_matrices(G, nodes=True)
G = G.subgraph(nodes_cc)
n = G.number_of_nodes()
A = Dist

n=7020, G.number_of_edges()=23853
[572, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

In [4]:
# ldr.plot_nx_G(G, with_labels=False, node_size=1, f_layout=nx.spring_layout, width=0.05)

In [5]:
assert nx.is_strongly_connected(G)
np.histogram(Dist.flatten(), bins=5, density=True)

(array([0.00388619, 0.24021957, 0.22737664, 0.02706734, 0.00145025]),
 array([ 0.,  2.,  4.,  6.,  8., 10.]))

In [6]:
adjacency_list = ldr.adjacency_directed_list(Adj)
sources, targets = ldr.st_pairs(n, Dist, 1020)
M = min(1000, sources.size)
sources = sources[:M]
targets = targets[:M]

In [7]:
PSD = False
w_min = A[A>0].min()
rt_max_iters = min(int(5*A.max()/w_min), (10**4) // 2)
symm = np.allclose(A, A.T)
print(f"{symm=}")
filename = "%s_r%d_%d"%(mtype, rank, n)

symm=False


In [8]:
np.histogram(Adj[Adj>0], bins=5, density=True)

(array([1.22064092e-02, 9.26482673e-06, 0.00000000e+00, 4.63241337e-06,
        4.63241337e-06]),
 array([  1. ,  82.8, 164.6, 246.4, 328.2, 410. ]))

In [9]:
info = {} 

In [10]:
fraction_of_nodes = 0.1
pi_rows = np.random.permutation(n)[:int(n * fraction_of_nodes)]
pi_cols = pi_rows #np.random.permutation(n)[:int(n * fraction_of_nodes)]

pi_row_Dist = ldr.sparse_sampled_matrix(pi_rows, Dist, rows=True)
pi_col_Dist_T = ldr.sparse_sampled_matrix(pi_cols, Dist, rows=False)

rDist = np.zeros((n, n))
rDist[pi_rows] = Dist[pi_rows]
assert np.allclose(pi_row_Dist.toarray(), rDist)

cDist = np.zeros((n, n))
cDist[:, pi_cols] = Dist[:, pi_cols]
assert np.allclose(pi_col_Dist_T.T.toarray(), cDist)
print("PASSED")

rDist = Dist[pi_rows]
cDist = Dist[:, pi_cols].T

pi_rows_c = np.delete(np.arange(n), pi_rows)
pi_cols_c = np.delete(np.arange(n), pi_cols)

PASSED


In [11]:
print("\n\nSymmetric fit -> split -> asymmetric fit")
# get symmetric embedding
X_symm, loss, losses = ldr.fast_cc(rank//2, pi_rows=pi_rows, pi_rows_c=pi_rows_c, rDist=rDist, #rDist=(rDist+cDist)/2, 
                               symm=True, n_init=5,
                               max_iter=1000, eps=1e-6, verbose=False, freq=500, cg_eps=1e-20, cg_max_iter=1000)
print(f"\n{rank/2=}, {loss=}")
l_dar = ldr.construct_node_embedding_graph(X_symm, adjacency_list)
_ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}



Symmetric fit -> split -> asymmetric fit


OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
  rho0 = np.dot(r, z)
  losses[k] = np.sqrt(np.dot(r, r)) / b_norm



rank/2=3.0, loss=1.0430089654218087
median_stretch=300.0%, mean_stretch=594.5%
%[ratio<2] = 40.60%, %[ratio<1.2] = 24.90%, %[ratio=1.] = 24.20%


In [12]:
print("\n\nSymmetric fit -> split -> asymmetric fit")
# get symmetric embedding
X_symm, loss, losses = ldr.slow_cc(rank//2, pi_rows=pi_rows, rDist=rDist,#rDist=(rDist+cDist)/2, 
                               symm=True, n_init=2, debug=True,
                               max_iter=1000, eps=1e-6, verbose=False, freq=500)
print(f"\n{rank/2=}, {loss=}")
l_dar = ldr.construct_node_embedding_graph(X_symm, adjacency_list)
_ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}



Symmetric fit -> split -> asymmetric fit

rank/2=3.0, loss=0.9846596731259017
median_stretch=225.0%, mean_stretch=484.5%
%[ratio<2] = 48.90%, %[ratio<1.2] = 30.40%, %[ratio=1.] = 29.80%


In [13]:
Z0 = np.concatenate([X_symm, X_symm], axis=0)

In [14]:
# split and fit asymmetric embedding
Z, loss, losses = ldr.slow_cc(rank, pi_rows=pi_rows, pi_cols=pi_cols, rDist=rDist, cDist=cDist, 
                               symm=False, n_init=2, debug=True, Z0=Z0,
                               max_iter=1000, eps=1e-6, verbose=False, freq=500)
print(f"\n{rank/2=}, {loss=}")
l_dar = ldr.construct_xy_node_embedding_graph(Z[:n], Z[n:], adjacency_list)
_ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}


rank/2=3.0, loss=0.34452660751844494
median_stretch=133.3%, mean_stretch=480.5%
%[ratio<2] = 56.60%, %[ratio<1.2] = 47.50%, %[ratio=1.] = 47.10%


In [15]:
# split and fit asymmetric embedding
Z0 = np.concatenate([X_symm, X_symm], axis=0)
assert not np.isnan(Z0).any()
# Z2 = np.random.randn(2*n, rank//2)
Z, loss, losses = ldr.fast_cc(rank, pi_rows=pi_rows, pi_cols=pi_cols, pi_rows_c=pi_rows_c, pi_cols_c=pi_cols_c, rDist=rDist, 
                              cDist=cDist, Z0=Z0, n_init=1,
                              max_iter=1000, eps=1e-10, verbose=False, freq=500, cg_eps=1e-20, cg_max_iter=1000)
print(f"\n{rank=}, {loss=}")
print(r"$\|x_i-y_j\|_2$")
l_dar = ldr.construct_xy_node_embedding_graph(Z[:n], Z[n:], adjacency_list)
_ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}


rank=6, loss=0.34452660751843034
$\|x_i-y_j\|_2$
median_stretch=133.3%, mean_stretch=480.7%
%[ratio<2] = 56.60%, %[ratio<1.2] = 47.50%, %[ratio=1.] = 47.10%


In [16]:
l_dar = ldr.construct_node_embedding_graph(Z[:n], adjacency_list)
_ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}

median_stretch=333.3%, mean_stretch=578.4%
%[ratio<2] = 40.30%, %[ratio<1.2] = 26.20%, %[ratio=1.] = 25.50%


In [17]:
for rank in [4, 8, 16, 20]:
    print("\n\nSymmetric fit -> split -> asymmetric fit")
    # get symmetric embedding
    Z_symm, loss, losses = ldr.fast_cc(rank//2, pi_rows=pi_rows, pi_rows_c=pi_rows_c, rDist=(rDist+cDist)/2, symm=True, n_init=5,
                                        max_iter=1000, eps=1e-6, verbose=False, freq=500)
    print(f"\n{rank/2=}, {losses[0]=}, {losses[-1]=}")
    # split and fit asymmetric embedding
    l_dar = ldr.construct_node_embedding_graph(Z_symm, adjacency_list)
    _ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}
    Z0 = np.concatenate([Z_symm, Z_symm], axis=0)
    Z, loss, losses = ldr.fast_cc(rank, pi_rows, pi_cols, pi_rows_c, pi_cols_c, rDist, cDist, Z0=Z0, n_init=1,
                                    max_iter=1000, eps=1e-10, verbose=False, freq=500, cg_eps=1e-7, cg_max_iter=1000)
    print(f"\n{rank=}, {losses[0]=}, {losses[-1]=}")
    print(r"$\|x_i-y_j\|_2$")
    l_dar = ldr.construct_xy_node_embedding_graph(Z[:n], Z[n:], adjacency_list)
    _ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}


    print("\n\nAsymmetric fit")
    Z, loss, losses = ldr.fast_cc(rank, pi_rows, pi_cols, pi_rows_c, pi_cols_c, rDist, cDist, n_init=2, 
                                  max_iter=1000, eps=1e-6, verbose=False, freq=500, cg_eps=1e-7, cg_max_iter=1000)
    print(f"\n{rank=}, {losses[0]=}, {losses[-1]=}")
    print(r"$\|x_i-y_j\|_2$")
    l_dar = ldr.construct_xy_node_embedding_graph(Z[:n], Z[n:], adjacency_list)
    _ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}
    print(r"$\|x_i-x_j\|_2$")
    l_dar = ldr.construct_node_embedding_graph(Z[:n], adjacency_list)
    _ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}
    print(r"$\|y_i-y_j\|_2$")
    l_dar = ldr.construct_node_embedding_graph(Z[n:], adjacency_list)
    _ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}



Symmetric fit -> split -> asymmetric fit

rank/2=2.0, losses[0]=9.715870736654784, losses[-1]=0.0
median_stretch=375.0%, mean_stretch=608.8%
%[ratio<2] = 32.40%, %[ratio<1.2] = 18.30%, %[ratio=1.] = 17.90%

rank=4, losses[0]=2.2439133292048843, losses[-1]=0.5449817057131584
$\|x_i-y_j\|_2$
median_stretch=200.0%, mean_stretch=530.1%
%[ratio<2] = 50.90%, %[ratio<1.2] = 32.10%, %[ratio=1.] = 30.30%


Asymmetric fit

rank=4, losses[0]=11.61309832219528, losses[-1]=0.0
$\|x_i-y_j\|_2$
median_stretch=129.2%, mean_stretch=370.2%
%[ratio<2] = 64.90%, %[ratio<1.2] = 46.40%, %[ratio=1.] = 45.10%
$\|x_i-x_j\|_2$
median_stretch=333.3%, mean_stretch=568.7%
%[ratio<2] = 38.00%, %[ratio<1.2] = 22.40%, %[ratio=1.] = 21.70%
$\|y_i-y_j\|_2$
median_stretch=300.0%, mean_stretch=545.2%
%[ratio<2] = 41.10%, %[ratio<1.2] = 23.40%, %[ratio=1.] = 22.70%


Symmetric fit -> split -> asymmetric fit

rank/2=4.0, losses[0]=5.181272940840443, losses[-1]=0.4051804087105434
median_stretch=310.0%, mean_stretch=525.7%

In [18]:
mf.rel_diff(Dist, Dist.T)

0.43620244217962734

In [19]:
for rank in [4, 8, 16, 20]:
    print("\n\nSymmetric fit -> split -> asymmetric fit")
    # get symmetric embedding
    Z_symm, loss, losses = ldr.fast_cc(rank//2, pi_rows=pi_rows, pi_rows_c=pi_rows_c, rDist=rDist, symm=True, n_init=5,
                                        max_iter=1000, eps=1e-6, verbose=False, freq=500)
    print(f"\n{rank/2=}, {losses[0]=}, {losses[-1]=}")
    # split and fit asymmetric embedding
    l_dar = ldr.construct_node_embedding_graph(Z_symm, adjacency_list)
    _ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}
    Z0 = np.concatenate([Z_symm, Z_symm], axis=0)
    Z, loss, losses = ldr.fast_cc(rank, pi_rows, pi_cols, pi_rows_c, pi_cols_c, rDist, cDist, Z0=Z0, n_init=2,
                                    max_iter=1000, eps=1e-6, verbose=False, freq=500, cg_eps=1e-7, cg_max_iter=1000)
    print(f"\n{rank=}, {losses[0]=}, {losses[-1]=}")
    print(r"$\|x_i-y_j\|_2$")
    l_dar = ldr.construct_xy_node_embedding_graph(Z[:n], Z[n:], adjacency_list)
    _ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}


    print("\n\nAsymmetric fit")
    Z, loss, losses = ldr.fast_cc(rank, pi_rows, pi_cols, pi_rows_c, pi_cols_c, rDist, cDist, n_init=2, 
                                  max_iter=1000, eps=1e-6, verbose=False, freq=500, cg_eps=1e-7, cg_max_iter=1000)
    print(f"\n{rank=}, {losses[0]=}, {losses[-1]=}")
    print(r"$\|x_i-y_j\|_2$")
    l_dar = ldr.construct_xy_node_embedding_graph(Z[:n], Z[n:], adjacency_list)
    _ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}
    print(r"$\|x_i-x_j\|_2$")
    l_dar = ldr.construct_node_embedding_graph(Z[:n], adjacency_list)
    _ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}
    print(r"$\|y_i-y_j\|_2$")
    l_dar = ldr.construct_node_embedding_graph(Z[n:], adjacency_list)
    _ = {'ratios' : ldr.subopt_ratios(l_dar, Dist, sources, targets)}



Symmetric fit -> split -> asymmetric fit

rank/2=2.0, losses[0]=13.123036978815625, losses[-1]=0.0
median_stretch=366.7%, mean_stretch=655.5%
%[ratio<2] = 37.30%, %[ratio<1.2] = 21.10%, %[ratio=1.] = 19.50%

rank=4, losses[0]=3.4615732413023754, losses[-1]=0.0
$\|x_i-y_j\|_2$
median_stretch=250.0%, mean_stretch=515.7%
%[ratio<2] = 47.10%, %[ratio<1.2] = 30.90%, %[ratio=1.] = 30.30%


Asymmetric fit

rank=4, losses[0]=11.696765920545753, losses[-1]=0.6171095725940461
$\|x_i-y_j\|_2$
median_stretch=150.0%, mean_stretch=440.6%
%[ratio<2] = 62.00%, %[ratio<1.2] = 37.60%, %[ratio=1.] = 36.50%
$\|x_i-x_j\|_2$
median_stretch=333.3%, mean_stretch=592.3%
%[ratio<2] = 39.50%, %[ratio<1.2] = 26.50%, %[ratio=1.] = 26.10%
$\|y_i-y_j\|_2$
median_stretch=300.0%, mean_stretch=584.7%
%[ratio<2] = 36.90%, %[ratio<1.2] = 20.90%, %[ratio=1.] = 20.30%


Symmetric fit -> split -> asymmetric fit

rank/2=4.0, losses[0]=7.001875123337862, losses[-1]=0.8085108752535417
median_stretch=233.3%, mean_stretch=549.