diff --git a/cpp_easygraph/CMakeLists.txt b/cpp_easygraph/CMakeLists.txt index 2a43c776..f272b808 100644 --- a/cpp_easygraph/CMakeLists.txt +++ b/cpp_easygraph/CMakeLists.txt @@ -41,4 +41,18 @@ endif() set_target_properties(cpp_easygraph PROPERTIES LINK_SEARCH_START_STATIC ON LINK_SEARCH_END_STATIC ON -) \ No newline at end of file +) + +find_package(Eigen3 QUIET) +if(Eigen3_FOUND) + message(STATUS "Found Eigen3: ${EIGEN3_INCLUDE_DIR}") + include_directories(${EIGEN3_INCLUDE_DIR}) + add_definitions(-DEIGEN_MAJOR_VERSION=${EIGEN3_VERSION_MAJOR}) +endif() + +# 启用OpenMP支持 +find_package(OpenMP) +if(OpenMP_CXX_FOUND) + message(STATUS "Found OpenMP: ${OpenMP_CXX_VERSION}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +endif() \ No newline at end of file diff --git a/cpp_easygraph/cpp_easygraph.cpp b/cpp_easygraph/cpp_easygraph.cpp index f5570d6e..487c0b1b 100644 --- a/cpp_easygraph/cpp_easygraph.cpp +++ b/cpp_easygraph/cpp_easygraph.cpp @@ -83,6 +83,7 @@ PYBIND11_MODULE(cpp_easygraph, m) { m.def("cpp_constraint", &constraint, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); m.def("cpp_effective_size", &effective_size, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); m.def("cpp_efficiency", &efficiency, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); + m.def("cpp_eigenvector_centrality", &cpp_eigenvector_centrality, py::arg("G"), py::arg("max_iter") = 100, py::arg("tol") = 1.0e-6, py::arg("nstart") = py::none(), py::arg("weight") = "weight"); m.def("cpp_hierarchy", &hierarchy, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); m.def("cpp_pagerank", &_pagerank, py::arg("G"), py::arg("alpha") = 0.85, py::arg("max_iterator") = 500, py::arg("threshold") = 1e-6); m.def("cpp_dijkstra_multisource", &_dijkstra_multisource, py::arg("G"), py::arg("sources"), py::arg("weight") = "weight", py::arg("target") = py::none()); diff --git a/cpp_easygraph/functions/centrality/centrality.h b/cpp_easygraph/functions/centrality/centrality.h index 7040618a..121c9eb0 100644 --- a/cpp_easygraph/functions/centrality/centrality.h +++ b/cpp_easygraph/functions/centrality/centrality.h @@ -12,4 +12,11 @@ py::object cpp_katz_centrality( py::object py_max_iter, py::object py_tol, py::object py_normalized +); +py::object cpp_eigenvector_centrality( + py::object G, + py::object py_max_iter, + py::object py_tol, + py::object py_nstart, + py::object py_weight ); \ No newline at end of file diff --git a/cpp_easygraph/functions/centrality/eigenvector.cpp b/cpp_easygraph/functions/centrality/eigenvector.cpp new file mode 100644 index 00000000..aa434621 --- /dev/null +++ b/cpp_easygraph/functions/centrality/eigenvector.cpp @@ -0,0 +1,416 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "centrality.h" +#include "../../classes/graph.h" + +#ifdef EIGEN_MAJOR_VERSION +#include +#include +#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 +#define HAVE_EIGEN +#endif +#endif + +namespace py = pybind11; + +class CSRMatrix { +public: + std::vector indptr; + std::vector indices; + std::vector data; + int rows, cols; + + CSRMatrix(int r, int c) : rows(r), cols(c) { + indptr.resize(r + 1, 0); + } + + void reserve(size_t nnz) { + indices.reserve(nnz); + data.reserve(nnz); + } + + std::vector multiply(const std::vector& vec) const { + std::vector result(rows, 0.0); + + #pragma omp parallel for if(rows > 1000) + for (int i = 0; i < rows; i++) { + for (int j = indptr[i]; j < indptr[i + 1]; j++) { + result[i] += data[j] * vec[indices[j]]; + } + } + + return result; + } + + void multiply_inplace(const std::vector& vec, std::vector& result) const { + std::fill(result.begin(), result.end(), 0.0); + + #pragma omp parallel for if(rows > 1000) + for (int i = 0; i < rows; i++) { + for (int j = indptr[i]; j < indptr[i + 1]; j++) { + result[i] += data[j] * vec[indices[j]]; + } + } + } + +#ifdef HAVE_EIGEN + Eigen::SparseMatrix to_eigen() const { + Eigen::SparseMatrix eigen_mat(rows, cols); + eigen_mat.reserve(Eigen::VectorXi::Constant(cols, 5)); + + for (int i = 0; i < rows; i++) { + for (int j = indptr[i]; j < indptr[i + 1]; j++) { + eigen_mat.insert(i, indices[j]) = data[j]; + } + } + eigen_mat.makeCompressed(); + return eigen_mat; + } +#endif +}; + +CSRMatrix build_transpose_matrix( + const Graph& graph, + const std::vector& nodes, + const std::string& weight_key +) { + int n = nodes.size(); + std::unordered_map node_to_idx; + for (size_t i = 0; i < nodes.size(); i++) { + node_to_idx[nodes[i]] = i; + } + + size_t nnz = 0; + for (node_t node_id : nodes) { + const auto& adj_it = graph.adj.find(node_id); + if (adj_it != graph.adj.end()) { + nnz += adj_it->second.size(); + } + } + + std::vector> edges; + edges.reserve(nnz); + + for (node_t src_id : nodes) { + int src_idx = node_to_idx[src_id]; + const auto& adj_it = graph.adj.find(src_id); + if (adj_it != graph.adj.end()) { + for (const auto& adj_pair : adj_it->second) { + node_t dst_id = adj_pair.first; + auto dst_it = node_to_idx.find(dst_id); + if (dst_it != node_to_idx.end()) { + int dst_idx = dst_it->second; + + double w = 1.0; + if (!weight_key.empty()) { + auto w_it = adj_pair.second.find(weight_key); + if (w_it != adj_pair.second.end()) { + w = w_it->second; + } + } + + edges.push_back(std::make_tuple(dst_idx, src_idx, w)); + } + } + } + } + + std::sort(edges.begin(), edges.end(), + [](const std::tuple& a, const std::tuple& b) { + if (std::get<0>(a) != std::get<0>(b)) { + return std::get<0>(a) < std::get<0>(b); + } + return std::get<1>(a) < std::get<1>(b); + }); + + CSRMatrix matrix(n, n); + matrix.reserve(edges.size()); + + for (const auto& edge : edges) { + int row = std::get<0>(edge); + matrix.indptr[row + 1]++; + } + + for (int i = 0; i < n; i++) { + matrix.indptr[i + 1] += matrix.indptr[i]; + } + + for (const auto& edge : edges) { + int row = std::get<0>(edge); + int col = std::get<1>(edge); + double val = std::get<2>(edge); + + matrix.indices.push_back(col); + matrix.data.push_back(val); + } + + std::vector has_connections(n, false); + + for (size_t i = 0; i < n; i++) { + if (matrix.indptr[i + 1] > matrix.indptr[i]) { + has_connections[i] = true; + } + } + + for (int i = 0; i < n; i++) { + if (!has_connections[i]) { + matrix.indices.push_back(i); + matrix.data.push_back(1.0e-4); + + for (int j = i + 1; j <= n; j++) { + matrix.indptr[j]++; + } + } + } + return matrix; +} + +std::vector power_iteration( + const CSRMatrix& A, + int max_iter, + double tol, + std::vector& x +) { + int n = A.rows; + std::vector x_next(n, 0.0); + + for (int iter = 0; iter < max_iter; iter++) { + A.multiply_inplace(x, x_next); + + double norm = 0.0; + #pragma omp parallel for reduction(+:norm) if(n > 10000) + for (int i = 0; i < n; i++) { + norm += x_next[i] * x_next[i]; + } + norm = std::sqrt(norm); + + if (norm < 1e-12) { + return x; + } + + #pragma omp parallel for if(n > 10000) + for (int i = 0; i < n; i++) { + x_next[i] /= norm; + } + + double diff = 0.0; + #pragma omp parallel for reduction(+:diff) if(n > 10000) + for (int i = 0; i < n; i++) { + diff += std::fabs(x_next[i] - x[i]); + } + + if (diff < n * tol) { + std::swap(x, x_next); + break; + } + + std::swap(x, x_next); + } + + return x; +} + +#ifdef HAVE_EIGEN +std::vector compute_eigenvector_eigen( + const CSRMatrix& A, + int max_iter, + double tol +) { + Eigen::SparseMatrix eigen_matrix = A.to_eigen(); + + int n = A.rows; + std::vector result(n); + + try { + Eigen::EigenSolver solver; + + if (n < 1000) { + Eigen::MatrixXd dense_matrix(eigen_matrix); + solver.compute(dense_matrix); + + int max_idx = 0; + double max_val = solver.eigenvalues()[0].real(); + for (int i = 1; i < n; i++) { + if (solver.eigenvalues()[i].real() > max_val) { + max_val = solver.eigenvalues()[i].real(); + max_idx = i; + } + } + + Eigen::VectorXd eigen_vec = solver.eigenvectors().col(max_idx).real(); + for (int i = 0; i < n; i++) { + result[i] = eigen_vec(i); + } + } else { + throw std::runtime_error("Matrix too large for dense solver"); + } + } + catch (const std::exception& e) { + std::vector x(n, 1.0/n); + return power_iteration(A, max_iter, tol, x); + } + + double sum = 0.0; + for (double val : result) { + sum += val; + } + + if (sum < 0.0) { + for (double& val : result) { + val = -val; + } + } + + double norm = 0.0; + for (double val : result) { + norm += val * val; + } + norm = std::sqrt(norm); + + for (double& val : result) { + val /= norm; + } + + return result; +} +#endif + +py::object cpp_eigenvector_centrality( + py::object G, + py::object py_max_iter, + py::object py_tol, + py::object py_nstart, + py::object py_weight +) { + try { + Graph& graph = G.cast(); + int max_iter = py_max_iter.cast(); + double tol = py_tol.cast(); + std::string weight_key = ""; + if (!py_weight.is_none()) { + weight_key = py_weight.cast(); + } + + if (graph.node.size() == 0) { + return py::dict(); + } + + std::vector nodes; + for (auto& node_pair : graph.node) { + nodes.push_back(node_pair.first); + } + int n = nodes.size(); + + CSRMatrix A_transpose = build_transpose_matrix(graph, nodes, weight_key); + + std::vector isolated_nodes(n, true); + for (int i = 0; i < n; i++) { + // A node is isolated if it has zero edges (no entries in its row) + if (A_transpose.indptr[i + 1] == A_transpose.indptr[i]) { + isolated_nodes[i] = true; + } else { + isolated_nodes[i] = false; + } + } + + std::vector centrality; + + if (py_nstart.is_none()) { + bool fast_solver_success = false; + +#ifdef HAVE_EIGEN + try { + centrality = compute_eigenvector_eigen(A_transpose, max_iter, tol); + fast_solver_success = true; + } catch (const std::exception&) { + fast_solver_success = false; + } +#endif + + if (!fast_solver_success) { + std::vector x(n, 1.0/n); + centrality = power_iteration(A_transpose, max_iter, tol, x); + } + } else { + py::dict nstart = py_nstart.cast(); + std::vector x(n, 0.0); + + for (size_t i = 0; i < nodes.size(); i++) { + py::object node_obj = graph.id_to_node[py::cast(nodes[i])]; + if (nstart.contains(node_obj)) { + x[i] = nstart[node_obj].cast(); + } else { + x[i] = 1.0; + } + } + + bool all_zeros = true; + for (double val : x) { + if (std::abs(val) > 1e-10) { + all_zeros = false; + break; + } + } + + if (all_zeros) { + throw std::runtime_error("initial vector cannot have all zero values"); + } + + double sum_abs = 0.0; + for (double val : x) { + sum_abs += std::fabs(val); + } + + for (double& val : x) { + val /= sum_abs; + } + + centrality = power_iteration(A_transpose, max_iter, tol, x); + } + + double sum = 0.0; + for (double val : centrality) { + sum += val; + } + + if (sum < 0.0) { + for (double& val : centrality) { + val = -val; + } + } + + for (int i = 0; i < n; i++) { + if (isolated_nodes[i]) { + centrality[i] = 0.0; + } + } + + double norm = 0.0; + for (double val : centrality) { + norm += val * val; + } + norm = std::sqrt(norm); + + if (norm > 0.0) { + for (double& val : centrality) { + val /= norm; + } + } + + py::dict result; + for (size_t i = 0; i < nodes.size(); i++) { + py::object node_obj = graph.id_to_node[py::cast(nodes[i])]; + result[node_obj] = centrality[i]; + } + + return result; + } catch (const std::exception& e) { + throw std::runtime_error(e.what()); + } +} \ No newline at end of file diff --git a/easygraph/functions/centrality/__init__.py b/easygraph/functions/centrality/__init__.py index f37e55fe..bd2fec50 100644 --- a/easygraph/functions/centrality/__init__.py +++ b/easygraph/functions/centrality/__init__.py @@ -5,4 +5,5 @@ from .flowbetweenness import * from .laplacian import * from .pagerank import * -from .katz_centrality import * \ No newline at end of file +from .katz_centrality import * +from .eigenvector import * \ No newline at end of file diff --git a/easygraph/functions/centrality/eigenvector.py b/easygraph/functions/centrality/eigenvector.py new file mode 100644 index 00000000..2d7506d0 --- /dev/null +++ b/easygraph/functions/centrality/eigenvector.py @@ -0,0 +1,155 @@ +import math +import easygraph as eg +from easygraph.utils import * +from easygraph.utils.decorators import * +from scipy import sparse +from scipy.sparse import linalg +import numpy as np +from collections import defaultdict + +__all__ = ["eigenvector_centrality"] + +@not_implemented_for("multigraph") +@hybrid("cpp_eigenvector_centrality") +def eigenvector_centrality(G, max_iter=100, tol=1.0e-6, nstart=None, weight=None): + """Calculate eigenvector centrality for nodes in the graph + + Eigenvector centrality is based on the idea that a node's importance + depends on the importance of its neighboring nodes. + Specifically, a node's centrality is proportional to the sum of + centrality values of its neighbors. + + Parameters + ---------- + G : graph object + An undirected or directed graph + + max_iter : int, optional (default=100) + Maximum number of iterations for the power method + + tol : float, optional (default=1.0e-6) + Convergence threshold; algorithm terminates when the difference + between centrality values in consecutive iterations is less than this value + + nstart : dictionary, optional (default=None) + Dictionary mapping nodes to initial centrality values + If None, the ARPACK solver is used to directly compute the eigenvector + + weight : string or None, optional (default=None) + Name of the edge attribute to be used as edge weight + If None, all edges are considered to have weight 1 + + Returns + ------- + centrality : dictionary + Dictionary mapping nodes to their eigenvector centrality values + + Raises + ------ + EasyGraphPointlessConcept + When input is an empty graph + + EasyGraphError + When the algorithm fails to converge within the specified maximum iterations + + Notes + ----- + This algorithm uses the power iteration method to find the principal eigenvector. + When nstart is not provided, the ARPACK solver is used for efficiency. + The returned centrality values are normalized. + """ + + if len(G) == 0: + raise eg.EasyGraphPointlessConcept( + "cannot compute centrality for the null graph" + ) + + if len(G) == 1: + raise eg.EasyGraphPointlessConcept( + "cannot compute eigenvector centrality for a single node graph" + ) + + + # Build node list and mapping + nodelist = list(G.nodes) + n = len(nodelist) + node_map = {node: i for i, node in enumerate(nodelist)} + + # Build weighted adjacency matrix + row, col, data = [], [], [] + for u in nodelist: + u_idx = node_map[u] + for v, attrs in G[u].items(): + if v in node_map: + v_idx = node_map[v] + w = attrs.get(weight, 1.0) if weight else 1.0 + # Build transpose matrix for centrality calculation + row.append(v_idx) + col.append(u_idx) + data.append(float(w)) + + # Create CSR format sparse matrix + A = sparse.csr_matrix((data, (row, col)), shape=(n, n)) + + # Detect and handle isolated nodes + row_sums = np.array(A.sum(axis=1)).flatten() + col_sums = np.array(A.sum(axis=0)).flatten() + isolated_nodes = np.where((row_sums == 0) & (col_sums == 0))[0] + + has_isolated = len(isolated_nodes) > 0 + isolated_indices = [] + + # Add small self-loops to isolated nodes for stability + if has_isolated: + # Store isolated node indices + isolated_indices = isolated_nodes.tolist() + + # Add small self-loop weights to isolated nodes + for idx in isolated_indices: + A[idx, idx] = 1.0e-4 # Small enough to not affect results, but maintains numerical stability + if nstart is not None: + # Use custom initial vector for power iteration + v = np.array([nstart.get(n, 1.0) for n in nodelist], dtype=float) + v = v / np.sum(np.abs(v)) + + # Power iteration method to compute principal eigenvector + v_last = np.zeros_like(v) + for _ in range(max_iter): + np.copyto(v_last, v) + v = A @ v_last # Sparse matrix multiplication + + norm = np.linalg.norm(v) + if norm < 1e-10: + v = v_last.copy() + break + v = v / norm # Normalization + + # Check convergence + if np.linalg.norm(v - v_last) < tol: + break + else: + raise eg.EasyGraphError(f"Eigenvector calculation did not converge in {max_iter} iterations") + + centrality = v + else: + # Use ARPACK solver to directly compute the principal eigenvector + eigenvalues, eigenvectors = linalg.eigs(A, k=1, which='LR', + maxiter=max_iter, tol=tol) + centrality = np.real(eigenvectors[:,0]) + + # Ensure positive results and normalize + if centrality.sum() < 0: + centrality = -centrality + + centrality = centrality / np.linalg.norm(centrality) + # Set centrality of isolated nodes to zero + if has_isolated: + for idx in isolated_indices: + centrality[idx] = 0.0 + # Renormalize if needed + if np.sum(centrality) > 0: + centrality = centrality / np.linalg.norm(centrality) + + # Return dictionary of node centrality values + return {nodelist[i]: float(centrality[i]) for i in range(n)} + diff --git a/easygraph/functions/centrality/tests/test_eigenvector.py b/easygraph/functions/centrality/tests/test_eigenvector.py new file mode 100644 index 00000000..1617b857 --- /dev/null +++ b/easygraph/functions/centrality/tests/test_eigenvector.py @@ -0,0 +1,119 @@ +import unittest +import numpy as np +import easygraph as eg +from easygraph.functions.centrality import eigenvector_centrality +from easygraph.utils.exception import EasyGraphPointlessConcept, EasyGraphNotImplemented + + +class Test_eigenvector(unittest.TestCase): + def setUp(self): + self.edges = [ + (1, 4), + (2, 4), + ("String", "Bool"), + (4, 1), + (0, 4), + (4, 256), + ((None, None), (None, None)), + ] + + self.test_graphs = [] + self.test_graphs.append(eg.classes.DiGraph(self.edges)) + + self.simple_graph = eg.Graph() + self.simple_graph.add_edges_from([(0, 1), (1, 2), (2, 3)]) + + self.directed_graph = eg.DiGraph() + self.directed_graph.add_edges_from([(0, 1), (1, 2), (2, 3)]) + + self.weighted_graph = eg.Graph() + self.weighted_graph.add_edges_from([(0, 1), (1, 2), (2, 3)]) + + for u, v, data in self.weighted_graph.edges: + data["weight"] = 2.0 + + self.disconnected_graph = eg.Graph() + self.disconnected_graph.add_edges_from([(0, 1), (2, 3)]) + + self.isolated_node_graph = eg.Graph() + self.isolated_node_graph.add_edges_from([(0, 1), (1, 2)]) + self.isolated_node_graph.add_node(3) + + self.single_node_graph = eg.Graph() + self.single_node_graph.add_node(42) + + self.mixed_nodes_graph = eg.Graph() + self.mixed_nodes_graph.add_edges_from([(1, 2), ("X", "Y"), ((1, 2), (3, 4))]) + + def test_eigenvector(self): + + for G in self.test_graphs: + result = eigenvector_centrality(G) + self.assertIsInstance(result, dict) + self.assertEqual(len(result), len(G)) + + for v in result.values(): + self.assertIsInstance(v, float) + + def test_simple_graph(self): + result = eigenvector_centrality(self.simple_graph) + self.assertEqual(len(result), len(self.simple_graph)) + + for v in result.values(): + self.assertIsInstance(v, float) + + def test_directed_graph(self): + result = eigenvector_centrality(self.directed_graph) + self.assertEqual(len(result), len(self.directed_graph)) + + def test_weighted_graph(self): + result = eigenvector_centrality(self.weighted_graph, weight="weight") + self.assertEqual(len(result), len(self.weighted_graph)) + + def test_disconnected_graph(self): + + result = eigenvector_centrality(self.disconnected_graph) + self.assertEqual(len(result), len(self.disconnected_graph)) + + def test_isolated_nodes(self): + + result = eigenvector_centrality(self.isolated_node_graph) + self.assertEqual(len(result), len(self.isolated_node_graph)) + self.assertTrue(3 in result) + self.assertGreaterEqual(result[3], 0) + + def test_single_node_graph(self): + with self.assertRaises(EasyGraphPointlessConcept): + eigenvector_centrality(self.single_node_graph) + + def test_mixed_node_types(self): + result = eigenvector_centrality(self.mixed_nodes_graph) + self.assertEqual(len(result), len(self.mixed_nodes_graph)) + + def test_max_iter_parameter(self): + result = eigenvector_centrality(self.simple_graph, max_iter=50) + self.assertEqual(len(result), len(self.simple_graph)) + + def test_tol_parameter(self): + result = eigenvector_centrality(self.simple_graph, tol=1.0e-3) + self.assertEqual(len(result), len(self.simple_graph)) + + def test_nstart_parameter(self): + nstart = {node: 1.0 for node in self.simple_graph} + result = eigenvector_centrality(self.simple_graph, nstart=nstart) + self.assertEqual(len(result), len(self.simple_graph)) + + def test_multigraph_raises(self): + G = eg.MultiGraph() + G.add_edges_from([(0, 1), (0, 1)]) + with self.assertRaises(EasyGraphNotImplemented): + eigenvector_centrality(G) + + def test_empty_graph_raises(self): + G = eg.Graph() + with self.assertRaises(EasyGraphPointlessConcept): + eigenvector_centrality(G) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file