diff --git a/pygsp/graphs/graph.py b/pygsp/graphs/graph.py index 1fc89c3a..d63feca3 100644 --- a/pygsp/graphs/graph.py +++ b/pygsp/graphs/graph.py @@ -560,11 +560,32 @@ def lmax(self): self.estimate_lmax() return self._lmax + def create_incidence_matrix(self): + r""" + Compute a new incidence matrix B and associated edge weight matrix Wb. + + The combinatorial graph laplacian can be recovered with L = B.T Wb B + For convenience, the heads and tails of each edge are saved in two + additional attributes start_nodes and end_nodes. + """ + if self.is_directed() or not self.is_connected(): + raise NotImplementedError('Focusing on connected non directed graphs first.') + + start_nodes, end_nodes, weights = sparse.find(sparse.tril(self.W)) + + data = np.concatenate([np.ones_like(start_nodes), -np.ones_like(end_nodes)]) + row = np.concatenate([np.arange(len(start_nodes)), np.arange(len(end_nodes))]) + col = np.concatenate([start_nodes, end_nodes]) + + self.B = sparse.coo_matrix((data, (row, col)), + shape=(len(start_nodes), self.N) ).tocsc() + self.Wb = sparse.diags(weights,0) + self.start_nodes = start_nodes + self.end_nodes = end_nodes + def estimate_lmax(self, recompute=False): r"""Estimate the Laplacian's largest eigenvalue (cached). - The result is cached and accessible by the :attr:`lmax` property. - Exact value given by the eigendecomposition of the Laplacian, see :func:`compute_fourier_basis`. That estimation is much faster than the eigendecomposition. diff --git a/pygsp/reduction.py b/pygsp/reduction.py index b1ca942f..45c4421d 100644 --- a/pygsp/reduction.py +++ b/pygsp/reduction.py @@ -34,16 +34,25 @@ def _analysis(g, s, **kwargs): return s.swapaxes(1, 2).reshape(-1, s.shape[1], order='F') -def graph_sparsify(M, epsilon, maxiter=10): +def graph_sparsify(M, epsilon, maxiter=10, fast=True): r"""Sparsify a graph (with Spielman-Srivastava). Parameters ---------- M : Graph or sparse matrix Graph structure or a Laplacian matrix - epsilon : int + + epsilon : float Sparsification parameter + maxiter : int (optional) + Number of iterations in successive attempts at reducing the sparsification + parameter to preserve connectivity. (default: 10) + + fast : bool + Whether to use the fast resistance distance from :cite:`spielman2011graph` + or exact value. (default: True) + Returns ------- Mnew : Graph or sparse matrix @@ -53,6 +62,11 @@ def graph_sparsify(M, epsilon, maxiter=10): ----- Epsilon should be between 1/sqrt(N) and 1 + The resistance distances computed by the `fast` option are approximate but + that approximation is included in the graph sparsification bounds of the + Spielman-Srivastava algorithm. Without this option, distances are computed + by blunt matrix inversion which does not scale for large graphs. + Examples -------- >>> from pygsp import reduction @@ -70,34 +84,26 @@ def graph_sparsify(M, epsilon, maxiter=10): if isinstance(M, graphs.Graph): if not M.lap_type == 'combinatorial': raise NotImplementedError - L = M.L + g = M + g.create_incidence_matrix() else: - L = M + g = Graph(W=sparse.diags(M.diagonal()) - M, lap_type='combinatorial') + g.create_incidence_matrix() - N = np.shape(L)[0] + N = g.N if not 1./np.sqrt(N) <= epsilon < 1: raise ValueError('GRAPH_SPARSIFY: Epsilon out of required range') - # Not sparse - resistance_distances = utils.resistance_distance(L).toarray() - # Get the Weight matrix - if isinstance(M, graphs.Graph): - W = M.W + if fast: + Re = utils.approx_resistance_distance(g, epsilon) else: - W = np.diag(L.diagonal()) - L.toarray() - W[W < 1e-10] = 0 - - W = sparse.coo_matrix(W) - W.data[W.data < 1e-10] = 0 - W = W.tocsc() - W.eliminate_zeros() - - start_nodes, end_nodes, weights = sparse.find(sparse.tril(W)) + Re = utils.resistance_distance(g.L).toarray() + Re = Re[g.start_nodes, g.end_nodes] # Calculate the new weights. - weights = np.maximum(0, weights) - Re = np.maximum(0, resistance_distances[start_nodes, end_nodes]) + weights = np.maximum(0, g.Wb.diagonal()) + Re = np.maximum(0, Re) Pe = weights * Re Pe = Pe / np.sum(Pe) @@ -117,7 +123,7 @@ def graph_sparsify(M, epsilon, maxiter=10): counts[spin_counts[:, 0]] = spin_counts[:, 1] new_weights = counts * per_spin_weights - sparserW = sparse.csc_matrix((new_weights, (start_nodes, end_nodes)), + sparserW = sparse.csc_matrix((new_weights, (g.start_nodes, g.end_nodes)), shape=(N, N)) sparserW = sparserW + sparserW.T sparserL = sparse.diags(sparserW.diagonal(), 0) - sparserW @@ -276,7 +282,9 @@ def graph_multiresolution(G, levels, sparsify=True, sparsify_eps=None, raise NotImplementedError('Unknown graph reduction method.') if sparsify and Gs[i+1].N > 2: + coords = Gs[i+1].coords Gs[i+1] = graph_sparsify(Gs[i+1], min(max(sparsify_eps, 2. / np.sqrt(Gs[i+1].N)), 1.)) + Gs[i+1].coords = coords # TODO : Make in place modifications instead! if compute_full_eigen: @@ -293,7 +301,7 @@ def graph_multiresolution(G, levels, sparsify=True, sparsify_eps=None, return Gs -def kron_reduction(G, ind): +def kron_reduction(G, ind, threshold=np.spacing(1)): r"""Compute the Kron reduction. This function perform the Kron reduction of the weight matrix in the @@ -308,12 +316,22 @@ def kron_reduction(G, ind): Graph structure or weight matrix ind : list indices of the nodes to keep + threshold: float + Threshold applied to the reduced Laplacian matrix to remove numerical + noise. (default: marchine precision) Returns ------- Gnew : Graph or sparse matrix New graph structure or weight matrix + Notes + ----- + For large graphs, with default thresholding value, the kron reduction can + lead to an extremely large number of edges, most of which have very small + weight. In this situation, a larger thresholding can remove most of these + unnecessary edges, an approximation that also makes subsequent spasrsification + much faster. References ---------- @@ -321,7 +339,6 @@ def kron_reduction(G, ind): """ if isinstance(G, graphs.Graph): - if G.lap_type != 'combinatorial': msg = 'Unknown reduction for {} Laplacian.'.format(G.lap_type) raise NotImplementedError(msg) @@ -335,31 +352,27 @@ def kron_reduction(G, ind): else: L = G - N = np.shape(L)[0] ind_comp = np.setdiff1d(np.arange(N, dtype=int), ind) - L_red = L[np.ix_(ind, ind)] - L_in_out = L[np.ix_(ind, ind_comp)] - L_out_in = L[np.ix_(ind_comp, ind)].tocsc() - L_comp = L[np.ix_(ind_comp, ind_comp)].tocsc() + L_red = utils.extract_submatrix(L,ind, ind) + L_in_out = utils.extract_submatrix(L, ind, ind_comp) + L_out_in = L_in_out.transpose().tocsc() + L_comp = utils.extract_submatrix(L,ind_comp, ind_comp).tocsc() - Lnew = L_red - L_in_out.dot(linalg.spsolve(L_comp, L_out_in)) + Lnew = L_red - L_in_out.dot(utils.splu_inv_dot(L_comp, L_out_in)) - # Make the laplacian symmetric if it is almost symmetric! - if np.abs(Lnew - Lnew.T).sum() < np.spacing(1) * np.abs(Lnew).sum(): - Lnew = (Lnew + Lnew.T) / 2. + # Threshold excedingly small values for stability + Lnew = Lnew.tocoo() + Lnew.data[abs(Lnew.data) < threshold] = 0 + Lnew = Lnew.tocsc() + Lnew.eliminate_zeros() + + # Enforces symmetric Laplacian + Lnew = (Lnew + Lnew.T) / 2. if isinstance(G, graphs.Graph): - # Suppress the diagonal ? This is a good question? Wnew = sparse.diags(Lnew.diagonal(), 0) - Lnew - Snew = Lnew.diagonal() - np.ravel(Wnew.sum(0)) - if np.linalg.norm(Snew, 2) >= np.spacing(1000): - Wnew = Wnew + sparse.diags(Snew, 0) - - # Removing diagonal for stability - Wnew = Wnew - Wnew.diagonal() - coords = G.coords[ind, :] if len(G.coords.shape) else np.ndarray(None) Gnew = graphs.Graph(W=Wnew, coords=coords, lap_type=G.lap_type, plotting=G.plotting) diff --git a/pygsp/utils.py b/pygsp/utils.py index d2124b44..106f0f41 100644 --- a/pygsp/utils.py +++ b/pygsp/utils.py @@ -211,6 +211,145 @@ def resistance_distance(G): return rd +def approx_resistance_distance(g, epsilon): + r""" + Compute the resistance distances of each edge of a graph using the + Spielman-Srivastava algorithm. + + Parameters + ---------- + g : Graph + Graph structure + + epsilon: float + Sparsification parameter + + Returns + ------- + rd : ndarray + distance for every edge in the graph + + Examples + -------- + >>> + >>> + >>> + + Notes + ----- + This implementation avoids the blunt matrix inversion of the exact distance + distance and can scale to very large graphs. The approximation error is + included in the budget of Spielman-Srivastava sparsification algorithm. + + References + ---------- + :cite:`klein1993resistance` :cite:`spielman2011graph` + + """ + g.create_incidence_matrix() + n = g.N + k = 24 * np.log( n / epsilon) + Q = ((np.random.rand(int(k),g.Wb.shape[0]) > 0.5)*2. -1)/np.sqrt(k) + Y = sparse.csc_matrix(Q).dot(np.sqrt(g.Wb).dot(g.B)) + + r = splu_inv_dot(g.L, Y.T) + + return ((r[g.start_nodes] - r[g.end_nodes]).toarray()**2).sum(axis=1) + +def extract_submatrix(M, ind_rows, ind_cols): + r""" + Extract a bloc of specific rows and columns from a sparse matrix. + + Parameters + ---------- + + M : sparse matrix + Input matrix + + ind_rows: ndarray + Indices of rows to extract + + ind_cols: ndarray + Indices of columns to extract + + Returns + ------- + + sub_M: sparse matrix + Submatrix obtained from M keeping only the requested rows and columns + + Examples + -------- + >>> import scipy.sparse as sparse + >>> from pygsp import utils + >>> # Extracting first diagonal block from a sparse matrix + >>> M = sparse.csc_matrix((16, 16)) + >>> ind_row = range(8); ind_col = range(8) + >>> block = utils.extract_submatrix(M, ind_row, ind_col) + >>> block.shape + (8, 8) + + """ + M = M.tocoo() + + # Finding elements of the sub-matrix + m = np.in1d(M.row, ind_rows) & np.in1d(M.col, ind_cols) + n_elem = m.sum() + + # Finding new rows and column indices + # The concatenation with ind and ind_comp is there to account for the fact that some rows + # or columns may not have elements in them, which foils this np.unique trick + _, row = np.unique(np.concatenate([M.row[m], ind_rows]), return_inverse=True) + _, col = np.unique(np.concatenate([M.col[m], ind_cols]), return_inverse=True) + + return sparse.coo_matrix((M.data[m], (row[:n_elem],col[:n_elem])), + shape=(len(ind_rows),len(ind_cols)),copy=True) + + +def splu_inv_dot(A, B, threshold=np.spacing(1)): + """ + Compute A^{-1}B for sparse matrix A assuming A is Symmetric Diagonally + Dominant (SDD). + + Parameters + ---------- + A : sparse matrix + Input SDD matrix to invert, in CSC or CSR form. + + B : sparse matrix + Matrix or vector of the right hand side + + threshold: float, optional + Threshold to apply to result as to remove numerical noise before + conversion to sparse format. (default: machine precision) + + Returns + ------- + res: sparse matrix + Result of A^{-1}B + + Notes + ----- + This inversion by sparse linear system solving is optimized for SDD matrices + such as Graph Laplacians. Note that B is converted to a dense matrix before + being sent to splu, which is more computationally efficient but can lead to + very large memory usage if B is large. + """ + # Compute the LU decomposition of A + lu = sparse.linalg.splu(A, + diag_pivot_thresh=A.diagonal().min()*0.5, + permc_spec='MMD_AT_PLUS_A', + options={'SymmetricMode':True}) + + res = lu.solve(B.toarray()) + + # Threshold the result to remove numerical noise + res[abs(res) < threshold] = 0 + + # Convert to sparse matrix + res = sparse.csc_matrix(res) + + return res def symmetrize(W, method='average'): r"""