From 4502ee145d6abcd1ccb5724d2a02167619dc0823 Mon Sep 17 00:00:00 2001 From: EiffL Date: Sat, 3 Jun 2017 23:21:26 -0400 Subject: [PATCH 1/8] Added approximate resistance distance and faster kron reduction --- pygsp/graphs/graph.py | 24 ++++++++++++ pygsp/operators/reduction.py | 64 ++++++++++++------------------ pygsp/utils.py | 75 ++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 39 deletions(-) diff --git a/pygsp/graphs/graph.py b/pygsp/graphs/graph.py index 2c9446ef..d954ba1f 100644 --- a/pygsp/graphs/graph.py +++ b/pygsp/graphs/graph.py @@ -576,6 +576,30 @@ def create_laplacian(self, lap_type='combinatorial'): self.L = L + def create_incidence_matrix(self): + r""" + Compute a new incidence matrix B and associated edge weight matrix We + + The combinatorial graph laplacian can be recovered with L = B.T W B + """ + if not hasattr(self, 'directed'): + self.is_directed() + + if self.directed or not self.connected: + raise NotImplementedError('Focusing on connected non directed graphs first.') + + start_nodes, end_nodes, weights = sparse.find(sparse.tril(self.W)) + + data = np.concatenate([np.ones(self.Ne/2), -np.ones(self.Ne/2)]) + row = np.concatenate([np.arange(self.Ne/2), np.arange(self.Ne/2)]) + col = np.concatenate([start_nodes, end_nodes]) + + self.B = sparse.coo_matrix((data, (row, col)), + shape=(self.Ne/2, self.N) ).tocsc() + self.Wb = sparse.diags(weights,0) + self.start_nodes = start_nodes + self.end_nodes = end_nodes + def estimate_lmax(self, force_recompute=False): r""" Estimate the maximal eigenvalue. diff --git a/pygsp/operators/reduction.py b/pygsp/operators/reduction.py index f72eb32f..98e47a94 100644 --- a/pygsp/operators/reduction.py +++ b/pygsp/operators/reduction.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- r"""This module contains functionalities for the reduction of graphs' vertex set while keeping the graph structure.""" -from ..utils import resistance_distance, build_logger +from ..utils import resistance_distance, build_logger, extract_submatrix, splu_inv_dot, approx_resistance_distance from ..graphs import Graph from ..filters import Filter @@ -12,7 +12,7 @@ logger = build_logger(__name__) -def graph_sparsify(M, epsilon, maxiter=10): +def graph_sparsify(M, epsilon, maxiter=10, fast=True): r""" Sparsify a graph using Spielman-Srivastava algorithm. @@ -23,6 +23,10 @@ def graph_sparsify(M, epsilon, maxiter=10): epsilon : int Sparsification parameter + fast : bool + Whether to use the fast resistance distance from :cite:`spielman2011graph` + or exact value + Returns ------- Mnew : Graph or sparse matrix @@ -49,35 +53,26 @@ def graph_sparsify(M, epsilon, maxiter=10): if isinstance(M, Graph): if not M.lap_type == 'combinatorial': raise NotImplementedError - L = M.L + g = M + g.create_incidence_matrix() else: - L = M + g = Graph(W=sparse.diags(M.diagonal()) - M, lap_type='combinatorial') + g.create_incidence_matrix() - N = np.shape(L)[0] + N = g.N if not 1./np.sqrt(N) <= epsilon < 1: raise ValueError('GRAPH_SPARSIFY: Epsilon out of required range') - # Not sparse - resistance_distances = resistance_distance(L).toarray() - # Get the Weight matrix - if isinstance(M, Graph): - W = M.W + if fast: + Re = approx_resistance_distance(g, epsilon) else: - W = np.diag(L.diagonal()) - L.toarray() - W[W < 1e-10] = 0 - - W = sparse.coo_matrix(W) - W.data[W.data < 1e-10] = 0 - W = W.tocsc() - W.eliminate_zeros() - - - start_nodes, end_nodes, weights = sparse.find(sparse.tril(W)) + Re = resistance_distance(g.L).toarray() + Re = Re[g.start_nodes, g.end_nodes] # Calculate the new weights. - weights = np.maximum(0, weights) - Re = np.maximum(0, resistance_distances[start_nodes, end_nodes]) + weights = np.maximum(0, g.Wb.diagonal()) + Re = np.maximum(0, Re) Pe = weights * Re Pe = Pe / np.sum(Pe) @@ -97,7 +92,7 @@ def graph_sparsify(M, epsilon, maxiter=10): counts[spin_counts[:, 0]] = spin_counts[:, 1] new_weights = counts * per_spin_weights - sparserW = sparse.csc_matrix((new_weights, (start_nodes, end_nodes)), + sparserW = sparse.csc_matrix((new_weights, (g.start_nodes, g.end_nodes)), shape=(N, N)) sparserW = sparserW + sparserW.T sparserL = sparse.diags(sparserW.diagonal(), 0) - sparserW @@ -327,27 +322,18 @@ def kron_reduction(G, ind): N = np.shape(L)[0] ind_comp = np.setdiff1d(np.arange(N, dtype=int), ind) - L_red = L[np.ix_(ind, ind)] - L_in_out = L[np.ix_(ind, ind_comp)] - L_out_in = L[np.ix_(ind_comp, ind)].tocsc() - L_comp = L[np.ix_(ind_comp, ind_comp)].tocsc() + L_red = extract_submatrix(L,ind, ind) + L_in_out = extract_submatrix(L, ind, ind_comp) + L_out_in = L_in_out.transpose().tocsc() + L_comp = extract_submatrix(L,ind_comp, ind_comp).tocsc() - Lnew = L_red - L_in_out.dot(spsolve(L_comp, L_out_in)) + Lnew = L_red - L_in_out.dot(splu_inv_dot(L_comp, L_out_in)) - # Make the laplacian symmetric if it is almost symmetric! - if np.abs(Lnew - Lnew.T).sum() < np.spacing(1) * np.abs(Lnew).sum(): - Lnew = (Lnew + Lnew.T) / 2. + # Enforces symmetric Laplacian + Lnew = (Lnew + Lnew.T) / 2. if isinstance(G, Graph): - # Suppress the diagonal ? This is a good question? Wnew = sparse.diags(Lnew.diagonal(), 0) - Lnew - Snew = Lnew.diagonal() - np.ravel(Wnew.sum(0)) - if np.linalg.norm(Snew, 2) >= np.spacing(1000): - Wnew = Wnew + sparse.diags(Snew, 0) - - # Removing diagonal for stability - Wnew = Wnew - Wnew.diagonal() - coords = G.coords[ind, :] if len(G.coords.shape) else np.ndarray(None) Gnew = Graph(W=Wnew, coords=coords, lap_type=G.lap_type, plotting=G.plotting, gtype='Kron reduction') diff --git a/pygsp/utils.py b/pygsp/utils.py index 9b4611e2..f407a72e 100644 --- a/pygsp/utils.py +++ b/pygsp/utils.py @@ -182,3 +182,78 @@ def resistance_distance(M): # 1 call dans operators.reduction - pseudo - pseudo.T return rd + +def approx_resistance_distance(g, epsilon): + """ + Computes the resistance distance using the ST algorithm + """ + g.create_incidence_matrix() + n = g.N + k = 24 * np.log( n / epsilon) + Q = ((np.random.rand(int(k),g.Wb.shape[0]) > 0.5)*2. -1)/np.sqrt(k) + Y = sparse.csc_matrix(Q).dot(np.sqrt(g.Wb).dot(g.B)) + + r = splu_inv_dot(g.L, Y.T) + + return ((r[g.start_nodes] - r[g.end_nodes]).toarray()**2).sum(axis=1) + +def extract_submatrix(M, ind_rows, ind_cols): + r""" + Extract a bloc from the provided sparse matrix + + Parameters + ---------- + + M : sparse matrix + Input matrix + + ind_rows: ndarray + Indices of rows to extract + + ind_cols: ndarray + Indices of columns to extract + + Returns + ------- + + sub_M: sparse matrix + Submatrix obtained from M keeping only the requested rows and columns + + """ + M = M.tocoo() + + # Finding elements of the sub-matrix + m = np.in1d(M.row, ind_rows) & np.in1d(M.col, ind_cols) + n_elem = m.sum() + + # Finding new rows and column indices + # The concatenation with ind and ind_comp is there to account for the fact that some rows + # or columns may not have elements in them, which foils this np.unique trick + _, row = np.unique(np.concatenate([M.row[m], ind_rows]), return_inverse=True) + _, col = np.unique(np.concatenate([M.col[m], ind_cols]), return_inverse=True) + + return sparse.coo_matrix((M.data[m], (row[:n_elem],col[:n_elem])), + shape=(len(ind_rows),len(ind_cols)),copy=True) + + +def splu_inv_dot(A, B, threshold=np.spacing(1)): + """ + Compute A^{-1}B for sparse matrices A and B, assuming A is SDD, + using superLU + """ + s = A.shape + # Compute the LU decomposition of A + lu = sparse.linalg.splu(A, + diag_pivot_thresh=A.diagonal().min()*0.5, + permc_spec='MMD_AT_PLUS_A', + options={'SymmetricMode':True}) + + res = lu.solve(B.toarray()) + + # Threshold the result to remove numerical noise + res[abs(res) < threshold] = 0 + + # Convert to sparse matrix + res = sparse.csc_matrix(res) + + return res From df029a27009cc255d2ddd25d6da42ce69b28d3ff Mon Sep 17 00:00:00 2001 From: EiffL Date: Sun, 4 Jun 2017 14:32:55 -0400 Subject: [PATCH 2/8] Added documentation --- pygsp/graphs/graph.py | 6 ++- pygsp/operators/reduction.py | 32 ++++++++++++++-- pygsp/utils.py | 74 +++++++++++++++++++++++++++++++++--- 3 files changed, 101 insertions(+), 11 deletions(-) diff --git a/pygsp/graphs/graph.py b/pygsp/graphs/graph.py index d954ba1f..49feb1cd 100644 --- a/pygsp/graphs/graph.py +++ b/pygsp/graphs/graph.py @@ -578,9 +578,11 @@ def create_laplacian(self, lap_type='combinatorial'): def create_incidence_matrix(self): r""" - Compute a new incidence matrix B and associated edge weight matrix We + Compute a new incidence matrix B and associated edge weight matrix Wb. - The combinatorial graph laplacian can be recovered with L = B.T W B + The combinatorial graph laplacian can be recovered with L = B.T Wb B + For convenience, the heads and tails of each edge are saved in two + additional attributes start_nodes and end_nodes. """ if not hasattr(self, 'directed'): self.is_directed() diff --git a/pygsp/operators/reduction.py b/pygsp/operators/reduction.py index 98e47a94..88747cd6 100644 --- a/pygsp/operators/reduction.py +++ b/pygsp/operators/reduction.py @@ -20,12 +20,17 @@ def graph_sparsify(M, epsilon, maxiter=10, fast=True): ---------- M : Graph or sparse matrix Graph structure or a Laplacian matrix - epsilon : int + + epsilon : float Sparsification parameter + maxiter : int (optional) + Number of iterations in successive attempts at reducing the sparsification + parameter to preserve connectivity. (default: 10) + fast : bool Whether to use the fast resistance distance from :cite:`spielman2011graph` - or exact value + or exact value. (default: True) Returns ------- @@ -36,6 +41,11 @@ def graph_sparsify(M, epsilon, maxiter=10, fast=True): ----- Epsilon should be between 1/sqrt(N) and 1 + The resistance distances computed by the `fast` option are approximate but + that approximation is included in the graph sparsification bounds of the + Spielman-Srivastava algorithm. Without this option, distances are computed + by blunt matrix inversion which does not scale for large graphs. + Examples -------- >>> from pygsp import graphs, operators @@ -274,7 +284,7 @@ def graph_multiresolution(G, levels, **kwargs): return Gs -def kron_reduction(G, ind): +def kron_reduction(G, ind, threshold=np.spacing(1)): r""" Compute the kron reduction. @@ -290,12 +300,22 @@ def kron_reduction(G, ind): Graph structure or weight matrix ind : list indices of the nodes to keep + threshold: float + Threshold applied to the reduced Laplacian matrix to remove numerical + noise. (default: marchine precision) Returns ------- Gnew : Graph or sparse matrix New graph structure or weight matrix + Notes + ----- + For large graphs, with default thresholding value, the kron reduction can + lead to an extremely large number of edges, most of which have very small + weight. In this situation, a larger thresholding can remove most of these + unnecessary edges, an approximation that also makes subsequent spasrsification + much faster. References ---------- @@ -329,6 +349,12 @@ def kron_reduction(G, ind): Lnew = L_red - L_in_out.dot(splu_inv_dot(L_comp, L_out_in)) + # Threshold excedingly small values for stability + Lnew = Lnew.tocoo() + Lnew.data[abs(Lnew.data) < threshold] = 0 + Lnew = Lnew.tocsc() + Lnew.eliminate_zeros() + # Enforces symmetric Laplacian Lnew = (Lnew + Lnew.T) / 2. diff --git a/pygsp/utils.py b/pygsp/utils.py index f407a72e..d50147d9 100644 --- a/pygsp/utils.py +++ b/pygsp/utils.py @@ -184,8 +184,39 @@ def resistance_distance(M): # 1 call dans operators.reduction return rd def approx_resistance_distance(g, epsilon): - """ - Computes the resistance distance using the ST algorithm + r""" + Compute the resistance distances of each edge of a graph using the + Spielman-Srivastava algorithm. + + Parameters + ---------- + g : Graph + Graph structure + + epsilon: float + Sparsification parameter + + Returns + ------- + rd : ndarray + distance for every edge in the graph + + Examples + -------- + >>> + >>> + >>> + + Notes + ----- + This implementation avoids the blunt matrix inversion of the exact distance + distance and can scale to very large graphs. The approximation error is + included in the budget of Spielman-Srivastava sparsification algorithm. + + References + ---------- + :cite:`klein1993resistance` :cite:`spielman2011graph` + """ g.create_incidence_matrix() n = g.N @@ -199,7 +230,7 @@ def approx_resistance_distance(g, epsilon): def extract_submatrix(M, ind_rows, ind_cols): r""" - Extract a bloc from the provided sparse matrix + Extract a bloc of specific rows and columns from a sparse matrix. Parameters ---------- @@ -219,6 +250,14 @@ def extract_submatrix(M, ind_rows, ind_cols): sub_M: sparse matrix Submatrix obtained from M keeping only the requested rows and columns + Examples + -------- + >>> # Extracting first diagonal block from a sparse matrix + >>> M = sparse.csc_matrix((16, 16)) + >>> ind_row = range(8); ind_col = range(8) + >>> block = extract_submatrix(M, ind_row, ind_col) + >>> block.shape + (8, 8) """ M = M.tocoo() @@ -238,10 +277,33 @@ def extract_submatrix(M, ind_rows, ind_cols): def splu_inv_dot(A, B, threshold=np.spacing(1)): """ - Compute A^{-1}B for sparse matrices A and B, assuming A is SDD, - using superLU + Compute A^{-1}B for sparse matrix A assuming A is Symmetric Diagonally + Dominant (SDD). + + Parameters + ---------- + A : sparse matrix + Input SDD matrix to invert, in CSC or CSR form. + + B : sparse matrix + Matrix or vector of the right hand side + + threshold: float, optional + Threshold to apply to result as to remove numerical noise before + conversion to sparse format. (default: machine precision) + + Returns + ------- + res: sparse matrix + Result of A^{-1}B + + Notes + ----- + This inversion by sparse linear system solving is optimized for SDD matrices + such as Graph Laplacians. Note that B is converted to a dense matrix before + being sent to splu, which is more computationally efficient but can lead to + very large memory usage if B is large. """ - s = A.shape # Compute the LU decomposition of A lu = sparse.linalg.splu(A, diag_pivot_thresh=A.diagonal().min()*0.5, From 570921369af48c4b207de182b478f6b67f676292 Mon Sep 17 00:00:00 2001 From: EiffL Date: Sun, 4 Jun 2017 14:36:36 -0400 Subject: [PATCH 3/8] Added to documentation --- doc/reference/utils.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/reference/utils.rst b/doc/reference/utils.rst index 39a85fe6..1d334131 100644 --- a/doc/reference/utils.rst +++ b/doc/reference/utils.rst @@ -27,3 +27,13 @@ Distanz Resistance distance ------------------- .. autofunction:: pygsp.utils.resistance_distance + +.. autofunction:: pygsp.utils.approx_resistance_distance + +SDD optimized sparse linear system solver +----------------------------------------- +.. autofunction:: pygsp.utils.splu_inv_dot + +Sparse matrix block extraction +------------------------------ +.. autofunction:: pygsp.utils.extract_submatrix From 3234d6651572065363b7025c741e05e47e026e3b Mon Sep 17 00:00:00 2001 From: Francois Date: Thu, 15 Jun 2017 15:05:08 -0400 Subject: [PATCH 4/8] Fixed example for doctest --- pygsp/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pygsp/utils.py b/pygsp/utils.py index d50147d9..3c8196eb 100644 --- a/pygsp/utils.py +++ b/pygsp/utils.py @@ -252,10 +252,12 @@ def extract_submatrix(M, ind_rows, ind_cols): Examples -------- + >>> import scipy.sparse as sparse + >>> from pygsp import utils >>> # Extracting first diagonal block from a sparse matrix >>> M = sparse.csc_matrix((16, 16)) >>> ind_row = range(8); ind_col = range(8) - >>> block = extract_submatrix(M, ind_row, ind_col) + >>> block = utils.extract_submatrix(M, ind_row, ind_col) >>> block.shape (8, 8) """ From df13afe698a1c626df76a830ec2da3fb96d03fab Mon Sep 17 00:00:00 2001 From: Francois Date: Thu, 15 Jun 2017 16:12:37 -0400 Subject: [PATCH 5/8] Fixed example for doctest --- pygsp/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pygsp/utils.py b/pygsp/utils.py index 3c8196eb..18bd63a0 100644 --- a/pygsp/utils.py +++ b/pygsp/utils.py @@ -260,6 +260,7 @@ def extract_submatrix(M, ind_rows, ind_cols): >>> block = utils.extract_submatrix(M, ind_row, ind_col) >>> block.shape (8, 8) + """ M = M.tocoo() From 548c35731debce56c9bda47409c4cc9bd1d7fde7 Mon Sep 17 00:00:00 2001 From: Francois Date: Thu, 19 Apr 2018 18:12:43 -0400 Subject: [PATCH 6/8] Fixes bug preventing multiresolution --- pygsp/reduction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygsp/reduction.py b/pygsp/reduction.py index b1ca942f..8f530156 100644 --- a/pygsp/reduction.py +++ b/pygsp/reduction.py @@ -135,7 +135,7 @@ def graph_sparsify(M, epsilon, maxiter=10): sparserW = (sparserW + sparserW.T) / 2. Mnew = graphs.Graph(W=sparserW) - #M.copy_graph_attributes(Mnew) + M.copy_graph_attributes(Mnew) else: Mnew = sparse.lil_matrix(sparserL) From 9ecacf073317f950d162c075ef1ab9299de6ea95 Mon Sep 17 00:00:00 2001 From: Francois Date: Thu, 19 Apr 2018 18:16:50 -0400 Subject: [PATCH 7/8] fix for mr --- pygsp/reduction.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pygsp/reduction.py b/pygsp/reduction.py index 8f530156..7b5ad50f 100644 --- a/pygsp/reduction.py +++ b/pygsp/reduction.py @@ -135,7 +135,7 @@ def graph_sparsify(M, epsilon, maxiter=10): sparserW = (sparserW + sparserW.T) / 2. Mnew = graphs.Graph(W=sparserW) - M.copy_graph_attributes(Mnew) + #M.copy_graph_attributes(Mnew) else: Mnew = sparse.lil_matrix(sparserL) @@ -276,7 +276,9 @@ def graph_multiresolution(G, levels, sparsify=True, sparsify_eps=None, raise NotImplementedError('Unknown graph reduction method.') if sparsify and Gs[i+1].N > 2: + coords = Gs[i+1].coords Gs[i+1] = graph_sparsify(Gs[i+1], min(max(sparsify_eps, 2. / np.sqrt(Gs[i+1].N)), 1.)) + Gs[i+1].coords = coords # TODO : Make in place modifications instead! if compute_full_eigen: From bf2183e29c17e28bbdd2595d629e3a445340649b Mon Sep 17 00:00:00 2001 From: EiffL Date: Thu, 19 Apr 2018 21:00:19 -0400 Subject: [PATCH 8/8] Fixes update issues --- pygsp/graphs/graph.py | 11 ++++------- pygsp/reduction.py | 8 +++----- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/pygsp/graphs/graph.py b/pygsp/graphs/graph.py index c6fd5397..d63feca3 100644 --- a/pygsp/graphs/graph.py +++ b/pygsp/graphs/graph.py @@ -568,20 +568,17 @@ def create_incidence_matrix(self): For convenience, the heads and tails of each edge are saved in two additional attributes start_nodes and end_nodes. """ - if not hasattr(self, 'directed'): - self.is_directed() - - if self.directed or not self.connected: + if self.is_directed() or not self.is_connected(): raise NotImplementedError('Focusing on connected non directed graphs first.') start_nodes, end_nodes, weights = sparse.find(sparse.tril(self.W)) - data = np.concatenate([np.ones(self.Ne/2), -np.ones(self.Ne/2)]) - row = np.concatenate([np.arange(self.Ne/2), np.arange(self.Ne/2)]) + data = np.concatenate([np.ones_like(start_nodes), -np.ones_like(end_nodes)]) + row = np.concatenate([np.arange(len(start_nodes)), np.arange(len(end_nodes))]) col = np.concatenate([start_nodes, end_nodes]) self.B = sparse.coo_matrix((data, (row, col)), - shape=(self.Ne/2, self.N) ).tocsc() + shape=(len(start_nodes), self.N) ).tocsc() self.Wb = sparse.diags(weights,0) self.start_nodes = start_nodes self.end_nodes = end_nodes diff --git a/pygsp/reduction.py b/pygsp/reduction.py index 294d1730..45c4421d 100644 --- a/pygsp/reduction.py +++ b/pygsp/reduction.py @@ -339,7 +339,6 @@ def kron_reduction(G, ind, threshold=np.spacing(1)): """ if isinstance(G, graphs.Graph): - if G.lap_type != 'combinatorial': msg = 'Unknown reduction for {} Laplacian.'.format(G.lap_type) raise NotImplementedError(msg) @@ -353,14 +352,13 @@ def kron_reduction(G, ind, threshold=np.spacing(1)): else: L = G - N = np.shape(L)[0] ind_comp = np.setdiff1d(np.arange(N, dtype=int), ind) - L_red = extract_submatrix(L,ind, ind) - L_in_out = extract_submatrix(L, ind, ind_comp) + L_red = utils.extract_submatrix(L,ind, ind) + L_in_out = utils.extract_submatrix(L, ind, ind_comp) L_out_in = L_in_out.transpose().tocsc() - L_comp = extract_submatrix(L,ind_comp, ind_comp).tocsc() + L_comp = utils.extract_submatrix(L,ind_comp, ind_comp).tocsc() Lnew = L_red - L_in_out.dot(utils.splu_inv_dot(L_comp, L_out_in))