diff --git a/CMakeLists.txt b/CMakeLists.txt index 20a3809b..ca28feb6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,22 +2,18 @@ cmake_minimum_required(VERSION 3.23) project(easygraph) +option(EASYGRAPH_ENABLE_OPENMP "Enable OpenMP acceleration (auto-detect)" ON) + option(EASYGRAPH_ENABLE_GPU "EASYGRAPH_ENABLE_GPU" OFF) add_subdirectory(cpp_easygraph) if (EASYGRAPH_ENABLE_GPU) - message("easygraph gpu module is enabled") - add_subdirectory(gpu_easygraph) - target_include_directories(cpp_easygraph PRIVATE gpu_easygraph ) - else() - message("easygraph gpu module is disabled") - -endif() \ No newline at end of file +endif() diff --git a/cpp_easygraph/CMakeLists.txt b/cpp_easygraph/CMakeLists.txt index 2a43c776..ae15d7c9 100644 --- a/cpp_easygraph/CMakeLists.txt +++ b/cpp_easygraph/CMakeLists.txt @@ -11,6 +11,7 @@ file(GLOB SOURCES add_subdirectory(pybind11) +option(EASYGRAPH_ENABLE_OPENMP "Enable OpenMP acceleration (auto-detect)" ON) option(EASYGRAPH_ENABLE_GPU "EASYGRAPH_ENABLE_GPU" OFF) if (EASYGRAPH_ENABLE_GPU) @@ -22,7 +23,7 @@ if (EASYGRAPH_ENABLE_GPU) set_property(TARGET cpp_easygraph PROPERTY CUDA_ARCHITECTURES all) - target_compile_definitions(cpp_easygraph + target_compile_definitions(cpp_easygraph PRIVATE EASYGRAPH_ENABLE_GPU ) @@ -31,14 +32,52 @@ if (EASYGRAPH_ENABLE_GPU) ) else() - pybind11_add_module(cpp_easygraph ${SOURCES} ) endif() -set_target_properties(cpp_easygraph PROPERTIES - LINK_SEARCH_START_STATIC ON - LINK_SEARCH_END_STATIC ON -) \ No newline at end of file +if (EASYGRAPH_ENABLE_OPENMP) + if (APPLE AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") + find_path(EG_LIBOMP_INCLUDE_DIR + NAMES omp.h + PATHS + /opt/homebrew/opt/libomp/include + /usr/local/opt/libomp/include + ) + + find_library(EG_LIBOMP_LIBRARY + NAMES omp libomp + PATHS + /opt/homebrew/opt/libomp/lib + /usr/local/opt/libomp/lib + ) + + if (EG_LIBOMP_INCLUDE_DIR AND EG_LIBOMP_LIBRARY) + message(STATUS "libomp found: ${EG_LIBOMP_LIBRARY}") + + set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp" CACHE STRING "" FORCE) + set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp" CACHE STRING "" FORCE) + + set(OpenMP_CXX_INCLUDE_DIR "${EG_LIBOMP_INCLUDE_DIR}" CACHE PATH "" FORCE) + set(OpenMP_omp_LIBRARY "${EG_LIBOMP_LIBRARY}" CACHE FILEPATH "" FORCE) + else() + message(STATUS + "libomp not found on macOS. OpenMP will be disabled.\n" + "To enable OpenMP, run: brew install libomp" + ) + endif() + endif() + + find_package(OpenMP QUIET) +endif() + +if (OpenMP_CXX_FOUND) + message(STATUS "OpenMP found, enabling parallel acceleration.") + target_link_libraries(cpp_easygraph PRIVATE OpenMP::OpenMP_CXX) + target_compile_definitions(cpp_easygraph PRIVATE EASYGRAPH_USE_OPENMP=1) +else() + message(STATUS "OpenMP not found, building in single-thread mode.") + target_compile_definitions(cpp_easygraph PRIVATE EASYGRAPH_USE_OPENMP=0) +endif() \ No newline at end of file diff --git a/cpp_easygraph/cpp_easygraph.cpp b/cpp_easygraph/cpp_easygraph.cpp index f5570d6e..144e09df 100644 --- a/cpp_easygraph/cpp_easygraph.cpp +++ b/cpp_easygraph/cpp_easygraph.cpp @@ -75,16 +75,20 @@ PYBIND11_MODULE(cpp_easygraph, m) { .def_property("pred", &DiGraph::get_pred,nullptr) .def("generate_linkgraph", &DiGraph_generate_linkgraph,py::arg("weight") = "weight"); + m.def("cpp_degree_centrality", °ree_centrality, py::arg("G")); + m.def("cpp_in_degree_centrality", &in_degree_centrality, py::arg("G")); + m.def("cpp_out_degree_centrality", &out_degree_centrality, py::arg("G")); m.def("cpp_closeness_centrality", &closeness_centrality, py::arg("G"), py::arg("weight") = "weight", py::arg("cutoff") = py::none(), py::arg("sources") = py::none()); m.def("cpp_betweenness_centrality", &betweenness_centrality, py::arg("G"), py::arg("weight") = "weight", py::arg("cutoff") = py::none(),py::arg("sources") = py::none(), py::arg("normalized") = py::bool_(true), py::arg("endpoints") = py::bool_(false)); m.def("cpp_katz_centrality", &cpp_katz_centrality, py::arg("G"), py::arg("alpha") = 0.1, py::arg("beta") = 1.0, py::arg("max_iter") = 1000, py::arg("tol") = 1e-6, py::arg("normalized") = true); + m.def("cpp_eigenvector_centrality", &cpp_eigenvector_centrality, py::arg("G"), py::arg("max_iter") = 100, py::arg("tol") = 1.0e-6, py::arg("nstart") = py::none(), py::arg("weight") = "weight"); m.def("cpp_k_core", &core_decomposition, py::arg("G")); m.def("cpp_density", &density, py::arg("G")); m.def("cpp_constraint", &constraint, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); m.def("cpp_effective_size", &effective_size, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); m.def("cpp_efficiency", &efficiency, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); m.def("cpp_hierarchy", &hierarchy, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); - m.def("cpp_pagerank", &_pagerank, py::arg("G"), py::arg("alpha") = 0.85, py::arg("max_iterator") = 500, py::arg("threshold") = 1e-6); + m.def("cpp_pagerank", &_pagerank, py::arg("G"), py::arg("alpha") = 0.85, py::arg("max_iterator") = 500, py::arg("threshold") = 1e-6, py::arg("weight") = "weight"); m.def("cpp_dijkstra_multisource", &_dijkstra_multisource, py::arg("G"), py::arg("sources"), py::arg("weight") = "weight", py::arg("target") = py::none()); m.def("cpp_spfa", &_spfa, py::arg("G"), py::arg("source"), py::arg("weight") = "weight"); m.def("cpp_clustering", &clustering, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none()); diff --git a/cpp_easygraph/functions/centrality/betweenness.cpp b/cpp_easygraph/functions/centrality/betweenness.cpp index fae281b2..9bef3011 100644 --- a/cpp_easygraph/functions/centrality/betweenness.cpp +++ b/cpp_easygraph/functions/centrality/betweenness.cpp @@ -1,5 +1,14 @@ -#include "centrality.h" +#ifdef _OPENMP +#include +#endif +#include +#include +#include +#include +#include +#include +#include "centrality.h" #ifdef EASYGRAPH_ENABLE_GPU #include #endif @@ -7,77 +16,170 @@ #include "../../classes/graph.h" #include "../../common/utils.h" #include "../../classes/linkgraph.h" -#include "../../classes/segment_tree.cpp" +namespace py = pybind11; -void betweenness_dijkstra(const Graph_L& G_l, const int &S, std::vector& bc, double cutoff, Segment_tree_zkw& segment_tree_zkw, int endpoints_) { - const int dis_inf = 0x3f3f3f3f; +void betweenness_bfs_worker( + const Graph_L& G_l, const int& S, std::vector& bc, int cutoff, int endpoints_, + std::vector& q, std::vector& dis, std::vector& head_path, std::vector& St, + std::vector& count_path, std::vector& delta, std::vector& E_path, + std::vector& stamp, int& cur_stamp +) { int N = G_l.n; int edge_number_path = 0; - segment_tree_zkw.init(N); - std::vector dis(N+1, INT_MAX); - std::vector head_path(N+1, 0); - const std::vector& head = G_l.head; - const std::vector& E = G_l.edges; - int edges_num = E.size(); - std::vector St(N+1, 0); - std::vector count_path(N+1, 0); - std::vector delta(N+1, 0); - std::vector E_path(edges_num+1); - head_path[S] = 0; + int cnt_St = 0; + ++cur_stamp; + if ((int)q.size() < N + 1) + q.resize(N + 1); + int front = 0, back = 0; + int cutoff_int = (cutoff < 0) ? -1 : cutoff; + + stamp[S] = cur_stamp; dis[S] = 0; count_path[S] = 1; - segment_tree_zkw.change(S, 0); - int cnt_St = 0; + delta[S] = 0.0; + head_path[S] = 0; + q[back++] = S; - while(segment_tree_zkw.t[1] != dis_inf) { - int u = segment_tree_zkw.num[1]; - if(u==0) break; - segment_tree_zkw.change(u, dis_inf); - if (cutoff >= 0 && dis[u] > cutoff){ - continue; - } + const std::vector& head = G_l.head; + const std::vector& E = G_l.edges; + + while (front < back) { + int u = q[front++]; + int du = dis[u]; + if (cutoff_int >= 0 && du > cutoff_int) + break; St[cnt_St++] = u; - for(int p = head[u]; p != -1; p = E[p].next) { + + for (int p = head[u]; p != -1; p = E[p].next) { int v = E[p].to; - if(cutoff >= 0 && (dis[u] + E[p].w) > cutoff){ + int new_dis = du + 1; + if (cutoff_int >= 0 && new_dis > cutoff_int) continue; - } - if (dis[v] > dis[u] + E[p].w) { - dis[v] = dis[u] + E[p].w; - segment_tree_zkw.change(v, dis[v]); + + if (stamp[v] != cur_stamp) { + stamp[v] = cur_stamp; + dis[v] = new_dis; count_path[v] = count_path[u]; + delta[v] = 0.0; head_path[v] = 0; + q[back++] = v; E_path[++edge_number_path].next = head_path[v]; E_path[edge_number_path].to = u; head_path[v] = edge_number_path; - } - else if (dis[v] == dis[u] + E[p].w) { + } else if (dis[v] == new_dis) { count_path[v] += count_path[u]; E_path[++edge_number_path].next = head_path[v]; E_path[edge_number_path].to = u; head_path[v] = edge_number_path; - } } } - if (endpoints_) { + + if (endpoints_) bc[S] += cnt_St - 1; - } + while (cnt_St > 0) { int u = St[--cnt_St]; - float coeff = (1.0 + delta[u]) / count_path[u]; - for(int p = head_path[u]; p; p = E_path[p].next){ - delta[E_path[p].to] += count_path[E_path[p].to] * coeff; + double cu = count_path[u]; + if (cu != 0) { + double coeff = (1.0 + delta[u]) / cu; + for (int p = head_path[u]; p; p = E_path[p].next) { + int w = E_path[p].to; + delta[w] += count_path[w] * coeff; + } } - if (u != S) bc[u] += delta[u] + endpoints_; } - } +void betweenness_dijkstra_worker( + const Graph_L& G_l, const int& S, std::vector& bc, double cutoff, + std::vector& dis, std::vector& head_path, + std::vector& St, std::vector& count_path, std::vector& delta, + std::vector& E_path, int endpoints_, + std::vector& stamp, int& cur_stamp +) { + const int dis_inf = 0x3f3f3f3f; + + int N = G_l.n; + int edge_number_path = 0; + int cnt_St = 0; + ++cur_stamp; + + stamp[S] = cur_stamp; + dis[S] = 0; + count_path[S] = 1; + delta[S] = 0.0; + head_path[S] = 0; + + std::priority_queue, std::vector>, std::greater>> pq; + pq.push({0, S}); + + const std::vector& head = G_l.head; + const std::vector& E = G_l.edges; + while (!pq.empty()) { + std::pair top = pq.top(); + pq.pop(); + int d = top.first; + int u = top.second; + + if (d > dis[u]) continue; + + if (cutoff >= 0 && d > cutoff) continue; + + St[cnt_St++] = u; + + for (int p = head[u]; p != -1; p = E[p].next) { + int v = E[p].to; + int w = E[p].w; + int nd = dis[u] + w; + + if (cutoff >= 0 && nd > cutoff) continue; + + bool first_visit = (stamp[v] != cur_stamp); + + if (first_visit || dis[v] > nd) { + if (first_visit) { + stamp[v] = cur_stamp; + delta[v] = 0.0; + } + dis[v] = nd; + count_path[v] = count_path[u]; + head_path[v] = 0; + E_path[++edge_number_path].next = head_path[v]; + E_path[edge_number_path].to = u; + head_path[v] = edge_number_path; + + pq.push({nd, v}); + } else if (dis[v] == nd) { + count_path[v] += count_path[u]; + E_path[++edge_number_path].next = head_path[v]; + E_path[edge_number_path].to = u; + head_path[v] = edge_number_path; + } + } + } + + if (endpoints_) + bc[S] += cnt_St - 1; + + while (cnt_St > 0) { + int u = St[--cnt_St]; + double cu = count_path[u]; + if (cu != 0) { + double coeff = (1.0 + delta[u]) / cu; + for (int p = head_path[u]; p; p = E_path[p].next) { + int w = E_path[p].to; + delta[w] += count_path[w] * coeff; + } + } + if (u != S) + bc[u] += delta[u] + endpoints_; + } +} static double calc_scale(int len_V, int is_directed, int normalized, int endpoints) { double scale = 1.0; @@ -88,10 +190,12 @@ static double calc_scale(int len_V, int is_directed, int normalized, int endpoin } else { scale = 1.0 / (double(len_V) * (len_V - 1)); } - } else if (len_V <= 2) { - scale = 1.0; } else { - scale = 1.0 / ((double(len_V) - 1) * (len_V - 2)); + if (len_V <= 2) { + scale = 1.0; + } else { + scale = 1.0 / ((double(len_V) - 1) * (len_V - 2)); + } } } else { if (!is_directed) { @@ -103,14 +207,13 @@ static double calc_scale(int len_V, int is_directed, int normalized, int endpoin return scale; } - - -static py::object invoke_cpp_betweenness_centrality(py::object G, py::object weight, - py::object cutoff, py::object sources, - py::object normalized, py::object endpoints){ +static py::object invoke_cpp_betweenness_centrality( + py::object G, py::object weight, py::object cutoff, py::object sources, + py::object normalized, py::object endpoints +) { Graph& G_ = G.cast(); int cutoff_ = -1; - if (!cutoff.is_none()){ + if (!cutoff.is_none()) { cutoff_ = cutoff.cast(); } int N = G_.node.size(); @@ -118,53 +221,149 @@ static py::object invoke_cpp_betweenness_centrality(py::object G, py::object wei int normalized_ = normalized.cast(); int endpoints_ = endpoints.cast(); double scale = calc_scale(N, is_directed, normalized_, endpoints_); - std::string weight_key = weight_to_string(weight); + bool use_weights = !weight.is_none(); + std::string weight_key = ""; + if (use_weights) { + weight_key = weight_to_string(weight); + } + Graph_L G_l; - if(G_.linkgraph_dirty){ + if (G_.linkgraph_dirty) { G_l = graph_to_linkgraph(G_, is_directed, weight_key, false, false); - G_.linkgraph_structure=G_l; - G_.linkgraph_dirty = false; - } - else{ + G_.linkgraph_structure = G_l; + } else { G_l = G_.linkgraph_structure; } - Segment_tree_zkw segment_tree_zkw(N); - std::vector bc(N+1, 0); + + int edges_num = G_l.edges.size(); + std::vector bc(N + 1, 0.0); std::vector BC; - if(!sources.is_none()){ + int num_threads = 1; +#ifdef _OPENMP + num_threads = omp_get_max_threads(); +#endif + + std::vector> dis_all(num_threads, std::vector(N + 1)); + std::vector> head_path_all(num_threads, std::vector(N + 1)); + std::vector> St_all(num_threads, std::vector(N + 1)); + std::vector> count_path_all(num_threads, std::vector(N + 1)); + std::vector> delta_all(num_threads, std::vector(N + 1)); + std::vector> E_path_all(num_threads, std::vector(edges_num + 1)); + + std::vector> queue_all(num_threads, std::vector(N + 1)); + std::vector> stamp_all(num_threads, std::vector(N + 1, 0)); + std::vector cur_stamp_all(num_threads, 0); + + std::vector> bc_local_all(num_threads, std::vector(N + 1, 0.0)); + + if (!sources.is_none()) { py::list sources_list = py::list(sources); int sources_list_len = py::len(sources_list); - for(int i = 0; i < sources_list_len; i++){ - if(G_.node_to_id.attr("get")(sources_list[i],py::none()).is_none()){ + std::vector sources_vec; + sources_vec.reserve(sources_list_len); + for (int i = 0; i < sources_list_len; i++) { + if (G_.node_to_id.attr("get")(sources_list[i], py::none()).is_none()) { printf("The node should exist in the graph!"); return py::none(); } - node_t source_id = G_.node_to_id.attr("get")(sources_list[i]).cast(); - betweenness_dijkstra(G_l, source_id, bc, cutoff_, segment_tree_zkw, endpoints_); + sources_vec.push_back(G_.node_to_id.attr("get")(sources_list[i]).cast()); } - for(int i = 1; i <= N; i++){ - BC.push_back(scale * bc[i]); - } - } - else{ - for (int i = 1; i <= N; ++i){ - betweenness_dijkstra(G_l, i, bc, cutoff_,segment_tree_zkw, endpoints_); + +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) +#endif + for (int i = 0; i < sources_list_len; i++) { + node_t source_id = sources_vec[i]; +#ifdef _OPENMP + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + auto& bc_local = bc_local_all[tid]; + auto& dis = dis_all[tid]; + auto& head_path = head_path_all[tid]; + auto& St = St_all[tid]; + auto& count_path = count_path_all[tid]; + auto& delta = delta_all[tid]; + auto& E_path = E_path_all[tid]; + auto& q = queue_all[tid]; + auto& stamp = stamp_all[tid]; + int& cur_stamp = cur_stamp_all[tid]; + + if (use_weights) { + betweenness_dijkstra_worker( + G_l, source_id, bc_local, cutoff_, dis, head_path, + St, count_path, delta, E_path, endpoints_, stamp, cur_stamp + ); + } else { + betweenness_bfs_worker( + G_l, source_id, bc_local, cutoff_, endpoints_, q, dis, head_path, + St, count_path, delta, E_path, stamp, cur_stamp + ); + } } - for(int i = 1; i <= N; i++){ - BC.push_back(scale * bc[i]); + } else { +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) +#endif + for (int i = 1; i <= N; ++i) { +#ifdef _OPENMP + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + auto& bc_local = bc_local_all[tid]; + auto& dis = dis_all[tid]; + auto& head_path = head_path_all[tid]; + auto& St = St_all[tid]; + auto& count_path = count_path_all[tid]; + auto& delta = delta_all[tid]; + auto& E_path = E_path_all[tid]; + auto& q = queue_all[tid]; + auto& stamp = stamp_all[tid]; + int& cur_stamp = cur_stamp_all[tid]; + + if (use_weights) { + betweenness_dijkstra_worker( + G_l, i, bc_local, cutoff_, dis, head_path, + St, count_path, delta, E_path, endpoints_, stamp, cur_stamp + ); + } else { + betweenness_bfs_worker( + G_l, i, bc_local, cutoff_, endpoints_, q, dis, head_path, + St, count_path, delta, E_path, stamp, cur_stamp + ); + } } } +#ifdef _OPENMP +#pragma omp parallel for schedule(static) + for (int j = 1; j <= N; ++j) { + double s = 0.0; + for (int tid = 0; tid < num_threads; ++tid) + s += bc_local_all[tid][j]; + bc[j] += s; + } +#else + for (int j = 1; j <= N; ++j) { + bc[j] += bc_local_all[0][j]; + } +#endif + + BC.reserve(N); + for (int i = 1; i <= N; i++) { + BC.push_back(scale * bc[i]); + } + py::array::ShapeContainer ret_shape{(int)BC.size()}; py::array_t ret(ret_shape, BC.data()); - return ret; } - #ifdef EASYGRAPH_ENABLE_GPU static py::object invoke_gpu_betweenness_centrality(py::object G, py::object weight, - py::object py_sources, py::object normalized, py::object endpoints) { +py::object py_sources, py::object normalized, py::object endpoints) { Graph& G_ = G.cast(); if (weight.is_none()) { G_.gen_CSR(); @@ -175,29 +374,26 @@ static py::object invoke_gpu_betweenness_centrality(py::object G, py::object wei std::vector& E = csr_graph->E; std::vector& V = csr_graph->V; std::vector *W_p = weight.is_none() ? &(csr_graph->unweighted_W) - : csr_graph->W_map.find(weight_to_string(weight))->second.get(); + : csr_graph->W_map.find(weight_to_string(weight))->second.get(); auto sources = G_.gen_CSR_sources(py_sources); std::vector BC; bool is_directed = G.attr("is_directed")().cast(); int gpu_r = gpu_easygraph::betweenness_centrality(V, E, *W_p, *sources, - is_directed, normalized.cast(), - endpoints.cast(), BC); - + is_directed, normalized.cast(), + endpoints.cast(), BC); if (gpu_r != gpu_easygraph::EG_GPU_SUCC) { // the code below will throw an exception py::pybind11_fail(gpu_easygraph::err_code_detail(gpu_r)); } - py::array::ShapeContainer ret_shape{(int)BC.size()}; py::array_t ret(ret_shape, BC.data()); - return ret; } #endif py::object betweenness_centrality(py::object G, py::object weight, py::object cutoff, py::object sources, - py::object normalized, py::object endpoints) { +py::object normalized, py::object endpoints) { #ifdef EASYGRAPH_ENABLE_GPU return invoke_gpu_betweenness_centrality(G, weight, sources, normalized, endpoints); #else @@ -212,7 +408,6 @@ py::object betweenness_centrality(py::object G, py::object weight, py::object cu // std::vector dis(N+1, INFINITY); // std::vector vis(N+1, false); // std::vector head_path(N+1, 0); - // const std::vector& head = G_l.head; // const std::vector& E = G_l.edges; // int edges_num = E.size(); @@ -220,10 +415,11 @@ py::object betweenness_centrality(py::object G, py::object weight, py::object cu // std::vector count_path(N+1, 0); // std::vector delta(N+1, 0); // std::vector E_path(edges_num+1); - // head_path[S] = 0; // dis[S] = 0; // count_path[S] = 1; +// dis[S] = 0; +// count_path[S] = 1; // q.push(compare_node(S, 0)); // int cnt_St = 0; // while(!q.empty()) { @@ -250,14 +446,12 @@ py::object betweenness_centrality(py::object G, py::object weight, py::object cu // E_path[++edge_number_path].next = head_path[v]; // E_path[edge_number_path].to = u; // head_path[v] = edge_number_path; - // } // else if (dis[v] == dis[u] + E[p].w) { // count_path[v] += count_path[u]; // E_path[++edge_number_path].next = head_path[v]; // E_path[edge_number_path].to = u; // head_path[v] = edge_number_path; - // } // } // } @@ -267,9 +461,7 @@ py::object betweenness_centrality(py::object G, py::object weight, py::object cu // for(int p = head_path[u]; p; p = E_path[p].next){ // delta[E_path[p].to] += count_path[E_path[p].to] * coeff; // } - // if (u != S) // bc[u] += delta[u]; // } // } - diff --git a/cpp_easygraph/functions/centrality/centrality.h b/cpp_easygraph/functions/centrality/centrality.h index 7040618a..6a204cc1 100644 --- a/cpp_easygraph/functions/centrality/centrality.h +++ b/cpp_easygraph/functions/centrality/centrality.h @@ -12,4 +12,16 @@ py::object cpp_katz_centrality( py::object py_max_iter, py::object py_tol, py::object py_normalized +); + +py::object degree_centrality(py::object G); +py::object in_degree_centrality(py::object G); +py::object out_degree_centrality(py::object G); + +py::object cpp_eigenvector_centrality( + py::object G, + py::object py_max_iter, + py::object py_tol, + py::object py_nstart, + py::object py_weight ); \ No newline at end of file diff --git a/cpp_easygraph/functions/centrality/closeness.cpp b/cpp_easygraph/functions/centrality/closeness.cpp index 524532fe..63eea703 100644 --- a/cpp_easygraph/functions/centrality/closeness.cpp +++ b/cpp_easygraph/functions/centrality/closeness.cpp @@ -7,44 +7,186 @@ #include "../../classes/graph.h" #include "../../common/utils.h" #include "../../classes/linkgraph.h" -#include "../../classes/segment_tree.cpp" -double closeness_dijkstra(const Graph_L& G_l, const int &S, int cutoff, Segment_tree_zkw& segment_tree_zkw){ - const int dis_inf = 0x3f3f3f3f; +#include +#include +#include +#include + +#ifdef _OPENMP +#include +#endif + +// Heap node: use negative value + max heap to implement min heap +typedef std::pair HeapNode; + +// Optimized adjacency list cache +struct FastAdjCache { + std::vector neighbor_ptrs; + std::vector neighbor_counts; + std::vector weight_ptrs; + std::vector> neighbor_storage; + std::vector> weight_storage; + + void init(int N) { + neighbor_ptrs.resize(N + 1, nullptr); + neighbor_counts.resize(N + 1, 0); + neighbor_storage.resize(N + 1); + } + + void init_with_weights(int N) { + init(N); + weight_ptrs.resize(N + 1, nullptr); + weight_storage.resize(N + 1); + } + + inline void build_if_needed(int u, const std::vector& head, + const std::vector& edges, bool store_weights) { + if (neighbor_ptrs[u] != nullptr) return; + + std::vector& neis = neighbor_storage[u]; + for (int p = head[u]; p != -1; p = edges[p].next) { + neis.push_back(edges[p].to); + if (store_weights) { + weight_storage[u].push_back(edges[p].w); + } + } + + neighbor_counts[u] = neis.size(); + neighbor_ptrs[u] = neis.data(); + if (store_weights) { + weight_ptrs[u] = weight_storage[u].data(); + } + } + + inline int* get_neighbors_ptr(int u) const { return neighbor_ptrs[u]; } + inline int get_neighbor_count(int u) const { return neighbor_counts[u]; } + inline float* get_weights_ptr(int u) const { return weight_ptrs[u]; } +}; + +// BFS implementation - directly use raw adjacency list +double closeness_bfs_direct(const Graph_L& G_l, const int &S, int cutoff, + std::vector& already_counted, + std::vector& queue_storage, + int timestamp) { int N = G_l.n; - segment_tree_zkw.init(N); - std::vector dis(N+1, INT_MAX); const std::vector& E = G_l.edges; const std::vector& head = G_l.head; - int number_connected = 0; + + int nodes_reached = 0; long long sum_dis = 0; - dis[S] = 0; - segment_tree_zkw.change(S, 0); - while(segment_tree_zkw.t[1] != dis_inf) { - int u = segment_tree_zkw.num[1]; - if(u == 0) break; - segment_tree_zkw.change(u, dis_inf); - if (cutoff >= 0 && dis[u] > cutoff){ + + queue_storage.clear(); + + int queue_front = 0; + already_counted[S] = timestamp; + queue_storage.push_back(S); + queue_storage.push_back(0); + + while (queue_front < static_cast(queue_storage.size())) { + int u = queue_storage[queue_front++]; + int actdist = queue_storage[queue_front++]; + + if (cutoff >= 0 && actdist > cutoff) { continue; } - number_connected += 1; - sum_dis += dis[u]; - for(int p = head[u]; p != -1; p = E[p].next) { + + sum_dis += actdist; + nodes_reached++; + + for (int p = head[u]; p != -1; p = E[p].next) { int v = E[p].to; - if(cutoff >= 0 && (dis[u] + E[p].w) > cutoff){ + + if (already_counted[v] == timestamp) { continue; } - if (dis[v] > dis[u] + E[p].w) { - dis[v] = dis[u] + E[p].w; - segment_tree_zkw.change(v, dis[v]); - } + + already_counted[v] = timestamp; + queue_storage.push_back(v); + queue_storage.push_back(actdist + 1); } } - if (number_connected == 1) + + if (nodes_reached == 1) return 0.0; else - return 1.0 * (number_connected - 1) * (number_connected - 1) / ((N - 1) * sum_dis); + return 1.0 * (nodes_reached - 1) * (nodes_reached - 1) / ((N - 1) * sum_dis); +} + +// Check if the graph is unweighted +inline bool is_unweighted_graph(const Graph_L& G_l) { + const std::vector& E = G_l.edges; + for (const auto& edge : E) { + if (std::abs(edge.w - 1.0f) > 1e-9) { + return false; + } + } + return true; +} +// Dijkstra implementation - use on-demand adjacency cache +double closeness_dijkstra_cached(const Graph_L& G_l, const int &S, int cutoff, + std::vector& dist, + std::vector& which, + FastAdjCache& cache, + int timestamp) { + int N = G_l.n; + const std::vector& E = G_l.edges; + const std::vector& head = G_l.head; + + int nodes_reached = 0; + double sum_dis = 0.0; + + std::priority_queue heap; + + dist[S] = 1.0f; + which[S] = timestamp; + heap.push({-1.0f, S}); + + while (!heap.empty()) { + HeapNode top = heap.top(); + heap.pop(); + float mindist = -top.first; + int minnei = top.second; + + if (mindist > dist[minnei]) { + continue; + } + + float actual_dist = mindist - 1.0f; + if (cutoff >= 0 && actual_dist > cutoff) { + continue; + } + + sum_dis += actual_dist; + nodes_reached++; + + cache.build_if_needed(minnei, head, E, true); + + int* neis = cache.get_neighbors_ptr(minnei); + float* ws = cache.get_weights_ptr(minnei); + int nlen = cache.get_neighbor_count(minnei); + + for (int j = 0; j < nlen; j++) { + int to = neis[j]; + float altdist = mindist + ws[j]; + float curdist = dist[to]; + + if (which[to] != timestamp) { + which[to] = timestamp; + dist[to] = altdist; + heap.push({-altdist, to}); + } else if (curdist == 0.0f || altdist < curdist) { + dist[to] = altdist; + heap.push({-altdist, to}); + } + } + } + + if (nodes_reached == 1) + return 0.0; + else + return 1.0 * (nodes_reached - 1) * (nodes_reached - 1) / ((N - 1) * sum_dis); } static py::object invoke_cpp_closeness_centrality(py::object G, py::object weight, @@ -58,27 +200,115 @@ static py::object invoke_cpp_closeness_centrality(py::object G, py::object weigh if (!cutoff.is_none()){ cutoff_ = cutoff.cast(); } - Segment_tree_zkw segment_tree_zkw(N); + + // Auto algorithm selection + bool use_bfs = (weight.is_none() || is_unweighted_graph(G_l)); + std::vector CC; + if(!sources.is_none()){ py::list sources_list = py::list(sources); int sources_list_len = py::len(sources_list); + CC.resize(sources_list_len); + + // Collect all source node IDs + std::vector source_ids(sources_list_len); for(int i = 0; i < sources_list_len; i++){ if(G_.node_to_id.attr("get")(sources_list[i],py::none()).is_none()){ printf("The node should exist in the graph!"); return py::none(); } - node_t source_id = G_.node_to_id.attr("get")(sources_list[i]).cast(); - float res = closeness_dijkstra(G_l, source_id, cutoff_,segment_tree_zkw); - CC.push_back(res); + source_ids[i] = G_.node_to_id.attr("get")(sources_list[i]).cast(); + } + + // OpenMP parallel computation + // Only enable parallelism when sources are many to avoid overhead + #pragma omp parallel if(sources_list_len > 100) + { + // Per-thread data structures (avoid race conditions) + std::vector already_counted(N + 1, 0); + std::vector queue_storage; + queue_storage.reserve(N * 2); + + std::vector dist(N + 1, 0.0f); + std::vector which(N + 1, 0); + + FastAdjCache cache; + if (!use_bfs) { + cache.init_with_weights(N); + } + + // Assign unique timestamp start for each thread + #ifdef _OPENMP + int thread_id = omp_get_thread_num(); + int num_threads = omp_get_num_threads(); + int timestamp = thread_id * sources_list_len; + #else + int timestamp = 0; + #endif + + // Parallel loop: each thread handles different source node + #pragma omp for schedule(dynamic, 1) + for(int i = 0; i < sources_list_len; i++){ + timestamp++; + double res; + if (use_bfs) { + res = closeness_bfs_direct(G_l, source_ids[i], cutoff_, + already_counted, queue_storage, timestamp); + } else { + res = closeness_dijkstra_cached(G_l, source_ids[i], cutoff_, + dist, which, cache, timestamp); + } + CC[i] = res; + } } } else{ - for(int i = 1; i <= N; i++){ - float res = closeness_dijkstra(G_l, i, cutoff_,segment_tree_zkw); - CC.push_back(res); + CC.resize(N); + + // OpenMP parallel computation for all nodes + // Only enable parallelism when node count is large + #pragma omp parallel if(N > 100) + { + // Per-thread data structures + std::vector already_counted(N + 1, 0); + std::vector queue_storage; + queue_storage.reserve(N * 2); + + std::vector dist(N + 1, 0.0f); + std::vector which(N + 1, 0); + + FastAdjCache cache; + if (!use_bfs) { + cache.init_with_weights(N); + } + + // Assign unique timestamp start for each thread + #ifdef _OPENMP + int thread_id = omp_get_thread_num(); + int num_threads = omp_get_num_threads(); + int timestamp = thread_id * N; + #else + int timestamp = 0; + #endif + + // Parallel loop: dynamic scheduling for load balancing + #pragma omp for schedule(dynamic, 10) + for(int i = 1; i <= N; i++){ + timestamp++; + double res; + if (use_bfs) { + res = closeness_bfs_direct(G_l, i, cutoff_, + already_counted, queue_storage, timestamp); + } else { + res = closeness_dijkstra_cached(G_l, i, cutoff_, + dist, which, cache, timestamp); + } + CC[i - 1] = res; + } } } + py::array::ShapeContainer ret_shape{(int)CC.size()}; py::array_t ret(ret_shape, CC.data()); @@ -120,4 +350,4 @@ py::object closeness_centrality(py::object G, py::object weight, py::object cuto #else return invoke_cpp_closeness_centrality(G, weight, cutoff, sources); #endif -} +} \ No newline at end of file diff --git a/cpp_easygraph/functions/centrality/degree.cpp b/cpp_easygraph/functions/centrality/degree.cpp new file mode 100644 index 00000000..83a8fdcd --- /dev/null +++ b/cpp_easygraph/functions/centrality/degree.cpp @@ -0,0 +1,96 @@ +#include "centrality.h" + +#include "../../classes/graph.h" +#include "../../classes/directed_graph.h" +#include "../../common/utils.h" +#include "../../classes/linkgraph.h" + +namespace py = pybind11; + +py::object degree_centrality( + py::object G +) { + Graph* graph = G.cast(); + py::dict centrality_map = py::dict(); + py::object nodes = graph->get_nodes(); + int n = py::len(nodes); + if (n <= 1) { + for (const auto& node_handle : nodes) { + centrality_map[node_handle] = 0.0; + } + return centrality_map; + } + + double scale = 1.0 / (n - 1); + + std::string class_name = G.attr("__class__").attr("__name__").cast(); + + if (class_name == "DiGraphC") { + // 有向图 (DiGraph) + DiGraph* digraph = G.cast(); + py::object adj = digraph->get_adj(); + py::object pred = digraph->get_pred(); + + for (const auto& node_handle : nodes) { + int out_deg = py::len(adj[node_handle]); + int in_deg = py::len(pred[node_handle]); + centrality_map[node_handle] = (double)(out_deg + in_deg) * scale; + } + } else { + py::object adj = graph->get_adj(); + for (const auto& node_handle : nodes) { + int degree = py::len(adj[node_handle]); + centrality_map[node_handle] = (double)degree * scale; + } + } + return centrality_map; +} + + +py::object in_degree_centrality( + py::object G +) { + DiGraph* graph = G.cast(); + py::dict centrality_map = py::dict(); + + py::object nodes = graph->get_nodes(); + int n = py::len(nodes); + + if (n <= 1) { + return centrality_map; + } + + double scale = 1.0 / (n - 1); + + py::object pred = graph->get_pred(); + + for (const auto& node_handle : nodes) { + int in_degree = py::len(pred[node_handle]); + centrality_map[node_handle] = in_degree * scale; + } + return centrality_map; +} + + +py::object out_degree_centrality( + py::object G +) { + Graph* graph = G.cast(); + py::dict centrality_map = py::dict(); + + py::object nodes = graph->get_nodes(); + int n = py::len(nodes); + + if (n <= 1) { + return centrality_map; + } + double scale = 1.0 / (n - 1); + + py::object adj = graph->get_adj(); + + for (const auto& node_handle : nodes) { + int out_degree = py::len(adj[node_handle]); + centrality_map[node_handle] = out_degree * scale; + } + return centrality_map; +} \ No newline at end of file diff --git a/cpp_easygraph/functions/centrality/eigenvector.cpp b/cpp_easygraph/functions/centrality/eigenvector.cpp new file mode 100644 index 00000000..a5f61729 --- /dev/null +++ b/cpp_easygraph/functions/centrality/eigenvector.cpp @@ -0,0 +1,198 @@ +#include +#include +#include +#include +#include +#include +#include + +#ifdef _OPENMP +#include +#endif + +#include "../../classes/graph.h" +#include "../../common/utils.h" + +namespace py = pybind11; + +class CSRMatrix { +public: + std::vector indptr; + std::vector indices; + std::vector data; // Empty if unweighted (all 1.0) + int rows, cols; + bool is_weighted; + + CSRMatrix(int r, int c) : rows(r), cols(c), is_weighted(false) { + indptr.assign(r + 1, 0); + } +}; + +// Power iteration with branch optimization for weighted/unweighted paths +std::vector power_iteration_optimized( + const CSRMatrix& A, + int max_iter, + double tol, + std::vector& x +) { + const int n = A.rows; + std::vector x_next(n); + bool use_weight = A.is_weighted && !A.data.empty(); + + // Initial normalization + double norm = 0.0; + #pragma omp parallel for reduction(+:norm) + for (int i = 0; i < n; ++i) norm += x[i] * x[i]; + norm = std::sqrt(norm); + + if (norm < 1e-12) { + std::fill(x.begin(), x.end(), 1.0 / std::sqrt(n)); + } else { + double inv_norm = 1.0 / norm; + #pragma omp parallel for + for (int i = 0; i < n; ++i) x[i] *= inv_norm; + } + + double delta = tol + 1.0; + for (int iter = 0; iter < max_iter && delta >= tol; ++iter) { + double next_norm_sq = 0.0; + + #pragma omp parallel for reduction(+:next_norm_sq) schedule(dynamic, 64) + for (int i = 0; i < n; ++i) { + double sum = 0.0; + const int start = A.indptr[i]; + const int end = A.indptr[i+1]; + + if (use_weight) { + for (int j = start; j < end; ++j) { + sum += A.data[j] * x[A.indices[j]]; + } + } else { + for (int j = start; j < end; ++j) { + sum += x[A.indices[j]]; + } + } + + x_next[i] = sum; + next_norm_sq += sum * sum; + } + + double next_norm = std::sqrt(next_norm_sq); + if (next_norm < 1e-12) break; + + double inv_next_norm = 1.0 / next_norm; + delta = 0.0; + + #pragma omp parallel for reduction(+:delta) schedule(static) + for (int i = 0; i < n; ++i) { + double val = x_next[i] * inv_next_norm; + delta += std::abs(val - x[i]); + x_next[i] = val; + } + x.swap(x_next); + } + return x; +} + +// Build transpose CSR with fallback logic for missing weight keys +CSRMatrix build_transpose_matrix_smart(Graph& graph, const std::vector& nodes, const std::string& weight_key) { + std::shared_ptr csr_ptr = weight_key.empty() ? graph.gen_CSR() : graph.gen_CSR(weight_key); + + int n = static_cast(nodes.size()); + CSRMatrix At(n, n); + if (!csr_ptr) return At; + + const auto& src_indptr = csr_ptr->V; + const auto& src_indices = csr_ptr->E; + std::vector src_data; + bool actually_weighted = false; + + // Detect if weighted calculation is required + if (!weight_key.empty()) { + auto it = csr_ptr->W_map.find(weight_key); + if (it != csr_ptr->W_map.end() && it->second) { + src_data = *(it->second); + for (double w : src_data) { + if (std::abs(w - 1.0) > 1e-9) { + actually_weighted = true; + break; + } + } + } + } + + At.is_weighted = actually_weighted; + + // Calculate row counts for transpose + for (int x_idx : src_indices) { + if (x_idx >= 0 && x_idx < n) At.indptr[x_idx + 1]++; + } + for (int i = 0; i < n; ++i) At.indptr[i + 1] += At.indptr[i]; + + At.indices.resize(src_indices.size()); + if (actually_weighted) At.data.resize(src_indices.size()); + + std::vector cur_pos(At.indptr.begin(), At.indptr.end()); + + // Populate transpose CSR data + for (int r = 0; r < n; ++r) { + for (int p = src_indptr[r]; p < src_indptr[r+1]; ++p) { + int c = src_indices[p]; + if (c < 0 || c >= n) continue; + int dest = cur_pos[c]++; + At.indices[dest] = r; + if (actually_weighted) At.data[dest] = src_data[p]; + } + } + return At; +} + +py::object cpp_eigenvector_centrality( + py::object G, + py::object py_max_iter, + py::object py_tol, + py::object py_nstart, + py::object py_weight +) { + try { + Graph& graph = G.cast(); + int max_iter = py_max_iter.cast(); + double tol = py_tol.cast(); + std::string weight_key = py_weight.is_none() ? "" : py_weight.cast(); + + if (graph.node.empty()) return py::dict(); + + std::vector nodes; + for (auto& pair : graph.node) nodes.push_back(pair.first); + int n = nodes.size(); + + CSRMatrix A_transpose = build_transpose_matrix_smart(graph, nodes, weight_key); + + // Initialize x vector (prefer degree-based or uniform) + std::vector x(n, 1.0 / n); + if (!py_nstart.is_none()) { + py::dict nstart = py_nstart.cast(); + for (int i = 0; i < n; i++) { + py::object node_obj = graph.id_to_node[py::cast(nodes[i])]; + if (nstart.contains(node_obj)) x[i] = nstart[node_obj].cast(); + } + } else { + for (int i = 0; i < n; i++) { + int degree = A_transpose.indptr[i+1] - A_transpose.indptr[i]; + x[i] = (degree > 0) ? (double)degree : 1.0/n; + } + } + + std::vector res = power_iteration_optimized(A_transpose, max_iter, tol, x); + + py::dict result; + for (int i = 0; i < n; i++) { + py::object node_obj = graph.id_to_node[py::cast(nodes[i])]; + result[node_obj] = res[i]; + } + return result; + + } catch (const std::exception& e) { + throw std::runtime_error(std::string("C++ Eigenvector Error: ") + e.what()); + } +} \ No newline at end of file diff --git a/cpp_easygraph/functions/centrality/katz_centrality.cpp b/cpp_easygraph/functions/centrality/katz_centrality.cpp index 63e78485..367d4397 100644 --- a/cpp_easygraph/functions/centrality/katz_centrality.cpp +++ b/cpp_easygraph/functions/centrality/katz_centrality.cpp @@ -1,120 +1,194 @@ -#include +#ifdef _OPENMP +#include +#endif #include +#include +#include +#include #include #include + #include "centrality.h" #include "../../classes/graph.h" namespace py = pybind11; -py::object cpp_katz_centrality( - py::object G, - py::object py_alpha, - py::object py_beta, - py::object py_max_iter, - py::object py_tol, - py::object py_normalized -) { - try { - Graph& graph = G.cast(); - auto csr = graph.gen_CSR(); - int n = csr->nodes.size(); - - if (n == 0) { - return py::dict(); - } +class CSRMatrix { +public: + std::vector indptr; // size rows+1 + std::vector indices; // size nnz + std::vector data; // size nnz + int rows = 0; + int cols = 0; - // Initialize vectors - std::vector x0(n, 1.0); - std::vector x1(n); - std::vector* x_prev = &x0; - std::vector* x_next = &x1; - - // Process beta parameter - std::vector b(n); - if (py::isinstance(py_beta) || py::isinstance(py_beta)) { - double beta_val = py_beta.cast(); - for (int i = 0; i < n; i++) { - b[i] = beta_val; - } - } else if (py::isinstance(py_beta)) { - py::dict beta_dict = py_beta.cast(); - for (int i = 0; i < n; i++) { - node_t internal_id = csr->nodes[i]; - py::object node_obj = graph.id_to_node[py::cast(internal_id)]; - if (beta_dict.contains(node_obj)) { - b[i] = beta_dict[node_obj].cast(); - } else { - b[i] = 1.0; - } - } - } else { - throw py::type_error("beta must be a float or a dict"); + CSRMatrix() = default; + CSRMatrix(int r, int c) : rows(r), cols(c) { + indptr.assign(r + 1, 0); + } +}; + +// Build transpose CSR from EasyGraph CSR so that row i contains in-neighbors of i. +static CSRMatrix build_transpose_matrix_from_csr(const std::shared_ptr& csr_ptr) { + if (!csr_ptr) return CSRMatrix(); + + const int n = static_cast(csr_ptr->nodes.size()); + if (n == 0) return CSRMatrix(0, 0); + + const auto& src_indptr = csr_ptr->V; + const auto& src_indices = csr_ptr->E; + + // Unweighted: all ones. + std::vector src_data(src_indices.size(), 1.0); + + CSRMatrix At(n, n); + + // Count nnz per column in the source (becomes nnz per row in transpose). + for (int c : src_indices) { + if (c >= 0 && c < n) At.indptr[c + 1]++; + } + + // Prefix sum. + for (int i = 0; i < n; ++i) { + At.indptr[i + 1] += At.indptr[i]; + } + + const int nnz = static_cast(src_indices.size()); + At.indices.resize(nnz); + At.data.resize(nnz); + + std::vector cur_pos(At.indptr.begin(), At.indptr.end()); + + // Fill transpose. + for (int r = 0; r < n; ++r) { + const int start = src_indptr[r]; + const int end = src_indptr[r + 1]; + for (int p = start; p < end; ++p) { + const int c = src_indices[p]; + if (c < 0 || c >= n) continue; + const int dest = cur_pos[c]++; + At.indices[dest] = r; + At.data[dest] = src_data[p]; } + } - // Extract parameters - double alpha = py_alpha.cast(); - int max_iter = py_max_iter.cast(); - double tol = py_tol.cast(); - bool normalized = py_normalized.cast(); - - // Iterative updates - int iter = 0; - for (; iter < max_iter; iter++) { - for (int i = 0; i < n; i++) { - double sum = 0.0; - int start = csr->V[i]; - int end = csr->V[i + 1]; - for (int jj = start; jj < end; jj++) { - int j = csr->E[jj]; - sum += (*x_prev)[j]; - } - (*x_next)[i] = alpha * sum + b[i]; - } + return At; +} - // Check convergence - double change = 0.0; - for (int i = 0; i < n; i++) { - change += std::abs((*x_next)[i] - (*x_prev)[i]); - } +static std::vector katz_centrality_omp(const CSRMatrix& A, + double alpha, + const std::vector& beta, + int max_iters, + double tol, + bool normalize) { + const int n = A.rows; + std::vector x(n, 1.0); // initial guess + std::vector x_next(n, 0.0); // next iterate + if (n == 0) return x; - if (change < tol) { - break; + for (int iter = 0; iter < max_iters; ++iter) { + double err_sq = 0.0; + double norm_sq = 0.0; + + // SpMV + Katz update + error and norm in ONE pass + #pragma omp parallel for reduction(+ : err_sq, norm_sq) schedule(static) + for (int i = 0; i < n; ++i) { + double sum = 0.0; + const int row_start = A.indptr[i]; + const int row_end = A.indptr[i + 1]; + + for (int e = row_start; e < row_end; ++e) { + sum += A.data[e] * x[A.indices[e]]; } - std::swap(x_prev, x_next); + const double new_val = alpha * sum + beta[i]; + const double diff = new_val - x[i]; + + x_next[i] = new_val; + err_sq += diff * diff; + norm_sq += new_val * new_val; } - // Handle convergence failure - if (iter == max_iter) { - throw std::runtime_error("Katz centrality failed to converge in " + std::to_string(max_iter) + " iterations"); + const double err = std::sqrt(err_sq); + const double norm = std::sqrt(norm_sq); + + x.swap(x_next); + + if (norm > 0.0 && (err / norm) < tol) { + break; } + } - // Normalization - std::vector& x_final = *x_next; - if (normalized) { - double norm = 0.0; - for (double val : x_final) { - norm += val * val; - } - norm = std::sqrt(norm); - if (norm > 0) { - for (int i = 0; i < n; i++) { - x_final[i] /= norm; - } + if (normalize) { + double norm_sq2 = 0.0; + #pragma omp parallel for reduction(+ : norm_sq2) schedule(static) + for (int i = 0; i < n; ++i) { + norm_sq2 += x[i] * x[i]; + } + const double norm = std::sqrt(norm_sq2); + if (norm > 0.0) { + #pragma omp parallel for schedule(static) + for (int i = 0; i < n; ++i) { + x[i] /= norm; } } + } + + return x; +} + +py::object cpp_katz_centrality(py::object G, + py::object py_alpha, + py::object py_beta, + py::object py_max_iter, + py::object py_tol, + py::object py_normalized) { + Graph& graph = G.cast(); - // Prepare results - py::dict result; - for (int i = 0; i < n; i++) { - node_t internal_id = csr->nodes[i]; + const double alpha = py_alpha.cast(); + const int max_iter = py_max_iter.cast(); + const double tol = py_tol.cast(); + const bool normalized = py_normalized.cast(); + + std::shared_ptr csr_ptr = graph.gen_CSR(); + if (!csr_ptr || csr_ptr->nodes.empty()) { + return py::dict(); + } + + const int n = static_cast(csr_ptr->nodes.size()); + + // Build transpose CSR so that we accumulate from in-neighbors. + CSRMatrix A = build_transpose_matrix_from_csr(csr_ptr); + + // Process beta parameter: scalar or dict(node->beta). + std::vector beta(n, 1.0); + if (py::isinstance(py_beta) || py::isinstance(py_beta)) { + const double beta_val = py_beta.cast(); + #pragma omp parallel for schedule(static) + for (int i = 0; i < n; ++i) { + beta[i] = beta_val; + } + } else if (py::isinstance(py_beta)) { + py::dict beta_dict = py_beta.cast(); + for (int i = 0; i < n; ++i) { + node_t internal_id = csr_ptr->nodes[i]; py::object node_obj = graph.id_to_node[py::cast(internal_id)]; - result[node_obj] = x_final[i]; + if (beta_dict.contains(node_obj)) { + beta[i] = beta_dict[node_obj].cast(); + } } + } else { + throw py::type_error("beta must be a float/int or a dict"); + } - return result; - } catch (const std::exception& e) { - throw std::runtime_error(e.what()); + std::vector scores = katz_centrality_omp(A, alpha, beta, max_iter, tol, normalized); + + // Prepare results + py::dict result; + for (int i = 0; i < n; ++i) { + node_t internal_id = csr_ptr->nodes[i]; + py::object node_obj = graph.id_to_node[py::cast(internal_id)]; + result[node_obj] = scores[i]; } -} \ No newline at end of file + + return result; +} diff --git a/cpp_easygraph/functions/pagerank/__init__.h b/cpp_easygraph/functions/pagerank/__init__.h index 3834f1ad..181674b7 100644 --- a/cpp_easygraph/functions/pagerank/__init__.h +++ b/cpp_easygraph/functions/pagerank/__init__.h @@ -2,4 +2,4 @@ #include "../../common/common.h" -py::object _pagerank(py::object G, double alpha, int max_iterator, double threshold); \ No newline at end of file +py::object _pagerank(py::object G, double alpha, int max_iterator, double threshold, py::object weight); \ No newline at end of file diff --git a/cpp_easygraph/functions/pagerank/pagerank.cpp b/cpp_easygraph/functions/pagerank/pagerank.cpp index 0c3d5479..dcc489a6 100644 --- a/cpp_easygraph/functions/pagerank/pagerank.cpp +++ b/cpp_easygraph/functions/pagerank/pagerank.cpp @@ -1,87 +1,119 @@ +#include +#include +#include +#include +#include +#ifdef _OPENMP +#include +#endif + #include "pagerank.h" #include "../../classes/directed_graph.h" +#include "../../classes/graph.h" #include "../../common/utils.h" #include "../../classes/linkgraph.h" -#include "time.h" - -struct Page { - Page(){} - Page(const double &_newPR, const double &_oldPR) {newPR = _newPR; oldPR = _oldPR;} - - double newPR, oldPR; -}; -// outDegree -// get_edge_from_node +namespace py = pybind11; -py::object _pagerank(py::object G, double alpha=0.85, int max_iterator=500, double threshold=1e-6) { +py::object _pagerank(py::object G, double alpha, int max_iterator, double threshold, py::object weight) { bool is_directed = G.attr("is_directed")().cast(); - if (is_directed == false) { - printf("PageRank is designed for directed graphs.\n"); - return py::dict(); - } - DiGraph& G_ = G.cast(); - int N = G_.node.size(); - // Graph_L G_l = graph_to_linkgraph(G_, is_directed, "", true); - Graph_L G_l; - if(G_.linkgraph_dirty){ - G_l = graph_to_linkgraph(G_, is_directed, "", true); - G_.linkgraph_structure=G_l; - G_.linkgraph_dirty = false; - } - else{ - G_l = G_.linkgraph_structure; + std::string weight_key = weight_to_string(weight); + bool has_weight_key = !weight.is_none() && !weight_key.empty(); + + Graph_L* G_l_ptr = nullptr; + int N = 0; + if (is_directed) { + DiGraph& G_ = G.cast(); + N = G_.node.size(); + if (G_.linkgraph_dirty) { + G_.linkgraph_structure = graph_to_linkgraph(G_, true, weight_key, true, false); + G_.linkgraph_dirty = false; + } + G_l_ptr = &G_.linkgraph_structure; + } else { + Graph& G_ = G.cast(); + N = G_.node.size(); + if (G_.linkgraph_dirty) { + G_.linkgraph_structure = graph_to_linkgraph(G_, false, weight_key, true, false); + G_.linkgraph_dirty = false; + } + G_l_ptr = &G_.linkgraph_structure; } - std::vector& E = G_l.edges; - std::vector outDegree = G_l.degree; - std::vector head = G_l.head; + const std::vector& E = G_l_ptr->edges; + const std::vector& outDegree = G_l_ptr->degree; + const std::vector& head = G_l_ptr->head; + + bool actually_weighted = false; + std::vector outWeightSum(N + 1, 0.0); - std::vectorpage(N+1); - for (int i = 1; i < N + 1; ++i) { - page[i] = Page(0, 1.0/N); + if (has_weight_key) { + #pragma omp parallel for reduction(|:actually_weighted) + for (int i = 1; i <= N; ++i) { + double sum_w = 0.0; + for (int p = head[i]; p != -1; p = E[p].next) { + sum_w += E[p].w; + if (!actually_weighted && std::abs(E[p].w - 1.0) > 1e-9) { + actually_weighted = true; + } + } + outWeightSum[i] = sum_w; + } } + bool use_weighted_logic = has_weight_key && actually_weighted; - int cnt = 0; //统计迭代几轮 - int shouldStop = 0; //根据oldPR与newPR的差值 判断是否停止迭代 + std::vector oldPR(N + 1, 1.0 / N); + std::vector newPR(N + 1, 0.0); + int cnt = 0; + while (cnt < max_iterator) { + double dangling_sum = 0.0; + + #pragma omp parallel for reduction(+:dangling_sum) + for (int i = 1; i <= N; ++i) { + bool is_dangling = use_weighted_logic ? (outWeightSum[i] < 1e-15) : (outDegree[i] == 0); + if (is_dangling) dangling_sum += oldPR[i]; + } - while(!shouldStop) - { - shouldStop = 1; - double res = 0; - for(int i = 1; i < N+1; ++i) { - if (outDegree[i] == 0) { - res += page[i].oldPR; - continue; + if (!use_weighted_logic) { + #pragma omp parallel for schedule(dynamic, 128) + for (int i = 1; i <= N; ++i) { + if (outDegree[i] == 0) continue; + double out_val = (oldPR[i] / outDegree[i]) * alpha; + for (int p = head[i]; p != -1; p = E[p].next) { + #pragma omp atomic + newPR[E[p].to] += out_val; + } } - double tmpPR = (page[i].oldPR / outDegree[i]) * alpha; - for(int p = head[i]; p != -1; p = E[p].next){ - page[E[p].to].newPR += tmpPR; + } else { + #pragma omp parallel for schedule(dynamic, 128) + for (int i = 1; i <= N; ++i) { + if (outWeightSum[i] < 1e-15) continue; + double out_val = (oldPR[i] / outWeightSum[i]) * alpha; + for (int p = head[i]; p != -1; p = E[p].next) { + #pragma omp atomic + newPR[E[p].to] += out_val * E[p].w; + } } } - double sum = 0; - for(int i = 1; i < N+1; ++i) - { - page[i].newPR += (1 - alpha) / N + res / N * alpha; - sum += fabs(page[i].newPR - page[i].oldPR); - - page[i].oldPR = page[i].newPR; - page[i].newPR = 0; + + double diff_sum = 0.0; + double jump_val = (1.0 - alpha) / N + (dangling_sum / N) * alpha; + + #pragma omp parallel for reduction(+:diff_sum) + for (int i = 1; i <= N; ++i) { + double final_pr = newPR[i] + jump_val; + diff_sum += std::fabs(final_pr - oldPR[i]); + oldPR[i] = final_pr; + newPR[i] = 0.0; } - - if (sum > threshold * N) - shouldStop = 0; + + if (diff_sum < threshold * N) break; cnt++; - if (cnt >= max_iterator) - break; - } - - py::list res_lst = py::list(); - for(int i = 1;i < N + 1;i++){ - res_lst.append(page[i].oldPR); } + py::list res_lst; + for (int i = 1; i <= N; ++i) res_lst.append(oldPR[i]); return res_lst; -} +} \ No newline at end of file diff --git a/cpp_easygraph/functions/pagerank/pagerank.h b/cpp_easygraph/functions/pagerank/pagerank.h index 3834f1ad..181674b7 100644 --- a/cpp_easygraph/functions/pagerank/pagerank.h +++ b/cpp_easygraph/functions/pagerank/pagerank.h @@ -2,4 +2,4 @@ #include "../../common/common.h" -py::object _pagerank(py::object G, double alpha, int max_iterator, double threshold); \ No newline at end of file +py::object _pagerank(py::object G, double alpha, int max_iterator, double threshold, py::object weight); \ No newline at end of file diff --git a/cpp_easygraph/functions/structural_holes/evaluation.cpp b/cpp_easygraph/functions/structural_holes/evaluation.cpp index 6627fc5b..8cd65f45 100644 --- a/cpp_easygraph/functions/structural_holes/evaluation.cpp +++ b/cpp_easygraph/functions/structural_holes/evaluation.cpp @@ -1,5 +1,15 @@ #include "evaluation.h" #include +#include +#include +#include +#include +#include +#include + +#ifdef _OPENMP +#include +#endif #ifdef EASYGRAPH_ENABLE_GPU #include #endif @@ -52,156 +62,175 @@ weight_t directed_mutual_weight(DiGraph& G, node_t u, node_t v, std::string weig weight_t normalized_mutual_weight(Graph& G, node_t u, node_t v, std::string weight, norm_t norm, rec_type& nmw_rec) { std::pair edge = std::make_pair(u, v); - weight_t nmw; - if (nmw_rec.count(edge)) { - nmw = nmw_rec[edge]; - } else { - weight_t scale = 0; - for (auto& w : G.adj[u]) { - weight_t temp_weight = mutual_weight(G, u, w.first, weight); - scale = (norm == sum) ? (scale + temp_weight) : std::max(scale, temp_weight); - } - nmw = scale ? (mutual_weight(G, u, v, weight) / scale) : 0; - nmw_rec[edge] = nmw; + if (nmw_rec.count(edge)) return nmw_rec[edge]; + + weight_t scale = 0; + for (auto& w : G.adj[u]) { + weight_t temp_weight = mutual_weight(G, u, w.first, weight); + scale = (norm == sum) ? (scale + temp_weight) : std::max(scale, temp_weight); } + weight_t nmw = scale ? (mutual_weight(G, u, v, weight) / scale) : 0; + nmw_rec[edge] = nmw; return nmw; } weight_t directed_normalized_mutual_weight(DiGraph& G, node_t u, node_t v, std::string weight, norm_t norm, rec_type& nmw_rec) { std::pair edge = std::make_pair(u, v); - weight_t nmw; - if (nmw_rec.count(edge)) { - nmw = nmw_rec[edge]; - } else { - weight_t scale = 0; - for (auto& w : G.adj[u]) { - weight_t temp_weight = directed_mutual_weight(G, u, w.first, weight); - scale = (norm == sum) ? (scale + temp_weight) : std::max(scale, temp_weight); - } - for (auto& w : G.pred[u]) { - weight_t temp_weight = directed_mutual_weight(G, u, w.first, weight); - scale = (norm == sum) ? (scale + temp_weight) : std::max(scale, temp_weight); - } - nmw = scale ? (directed_mutual_weight(G, u, v, weight) / scale) : 0; - nmw_rec[edge] = nmw; + if (nmw_rec.count(edge)) return nmw_rec[edge]; + + weight_t scale = 0; + for (auto& w : G.adj[u]) { + weight_t temp_weight = directed_mutual_weight(G, u, w.first, weight); + scale = (norm == sum) ? (scale + temp_weight) : std::max(scale, temp_weight); + } + for (auto& w : G.pred[u]) { + weight_t temp_weight = directed_mutual_weight(G, u, w.first, weight); + scale = (norm == sum) ? (scale + temp_weight) : std::max(scale, temp_weight); } + weight_t nmw = scale ? (directed_mutual_weight(G, u, v, weight) / scale) : 0; + nmw_rec[edge] = nmw; return nmw; } -weight_t directed_local_constraint(DiGraph& G, node_t u, node_t v, std::string weight, rec_type& local_constraint_rec, rec_type& sum_nmw_rec) { +weight_t local_constraint(Graph& G, node_t u, node_t v, std::string weight, rec_type& local_constraint_rec, rec_type& sum_nmw_rec) { std::pair edge = std::make_pair(u, v); - if (local_constraint_rec.count(edge)) { - return local_constraint_rec[edge]; - } else { - weight_t direct = directed_normalized_mutual_weight(G, u, v, weight, sum, sum_nmw_rec); - weight_t indirect = 0; - std::unordered_set neighbors; - for (const auto& n : G.adj[v]) { - neighbors.insert(n.first); - } - for (const auto& n : G.pred[v]) { - neighbors.insert(n.first); - } - for (const auto& n : neighbors) { - if (n == v) { - continue; - } - indirect += directed_normalized_mutual_weight(G, u, n, weight, sum, sum_nmw_rec) * - directed_normalized_mutual_weight(G, n, v, weight, sum, sum_nmw_rec); - } - weight_t result = pow((direct + indirect), 2); - local_constraint_rec[edge] = result; - return result; - } + if (local_constraint_rec.count(edge)) return local_constraint_rec[edge]; + + weight_t direct = normalized_mutual_weight(G, u, v, weight, sum, sum_nmw_rec); + weight_t indirect = 0; + for (auto& w : G.adj[u]) { + if (w.first == v) continue; + indirect += normalized_mutual_weight(G, u, w.first, weight, sum, sum_nmw_rec) * + normalized_mutual_weight(G, w.first, v, weight, sum, sum_nmw_rec); + } + weight_t result = pow((direct + indirect), 2); + local_constraint_rec[edge] = result; + return result; } -weight_t local_constraint(Graph& G, node_t u, node_t v, std::string weight, rec_type& local_constraint_rec, rec_type& sum_nmw_rec) { +weight_t directed_local_constraint(DiGraph& G, node_t u, node_t v, std::string weight, rec_type& local_constraint_rec, rec_type& sum_nmw_rec) { std::pair edge = std::make_pair(u, v); - if (local_constraint_rec.count(edge)) { - return local_constraint_rec[edge]; - } else { - weight_t direct = normalized_mutual_weight(G, u, v, weight, sum, sum_nmw_rec); - weight_t indirect = 0; - for (auto& w : G.adj[u]) { - if (w.first == v) { - continue; - } - indirect += normalized_mutual_weight(G, u, w.first, weight, sum, sum_nmw_rec) * - normalized_mutual_weight(G, w.first, v, weight, sum, sum_nmw_rec); - } - weight_t result = pow((direct + indirect), 2); - local_constraint_rec[edge] = result; - return result; - } -} + if (local_constraint_rec.count(edge)) return local_constraint_rec[edge]; -std::pair compute_constraint_of_v(Graph& G, node_t v, std::string weight, rec_type& local_constraint_rec, rec_type& sum_nmw_rec) { - weight_t constraint_of_v = 0; - if (G.adj[v].size() == 0) { - constraint_of_v = Py_NAN; - } else { - for (const auto& n : G.adj[v]) { - weight_t local_cons = local_constraint(G, v, n.first, weight, local_constraint_rec, sum_nmw_rec); - constraint_of_v += local_cons; - } - } - return std::make_pair(v, constraint_of_v); + weight_t direct = directed_normalized_mutual_weight(G, u, v, weight, sum, sum_nmw_rec); + weight_t indirect = 0; + std::unordered_set neighbors; + for (const auto& n : G.adj[v]) neighbors.insert(n.first); + for (const auto& n : G.pred[v]) neighbors.insert(n.first); + + for (const auto& n : neighbors) { + if (n == v) continue; + indirect += directed_normalized_mutual_weight(G, u, n, weight, sum, sum_nmw_rec) * + directed_normalized_mutual_weight(G, n, v, weight, sum, sum_nmw_rec); + } + weight_t result = pow((direct + indirect), 2); + local_constraint_rec[edge] = result; + return result; } -std::pair directed_compute_constraint_of_v(DiGraph& G, node_t v, std::string weight, rec_type& local_constraint_rec, rec_type& sum_nmw_rec) { - weight_t constraint_of_v = 0; - if (G.adj[v].size() == 0) { - constraint_of_v = Py_NAN; - } else { - std::unordered_set neighbors; - for (const auto& n : G.adj[v]) { - neighbors.insert(n.first); - } - for (const auto& n : G.pred[v]) { - neighbors.insert(n.first); - } - for (const auto& n : neighbors) { - weight_t local_cons = directed_local_constraint(G, v, n, weight, local_constraint_rec, sum_nmw_rec); - constraint_of_v += local_cons; +void preprocess_graph_for_constraint( + Graph& G, + std::string weight_key, + std::unordered_map>& weighted_adj, + std::unordered_map& strength +) { + for (auto& u_entry : G.adj) { + node_t u = u_entry.first; + for (auto& v_entry : u_entry.second) { + node_t v = v_entry.first; + double w = 1.0; + if (!weight_key.empty() && v_entry.second.count(weight_key)) { + w = v_entry.second[weight_key]; + } + weighted_adj[u][v] += w; + strength[u] += w; + weighted_adj[v][u] += w; + strength[v] += w; } } - return std::make_pair(v, constraint_of_v); } py::object invoke_cpp_constraint(py::object G, py::object nodes, py::object weight) { std::string weight_key = weight_to_string(weight); - rec_type sum_nmw_rec, local_constraint_rec; if (nodes.is_none()) { nodes = G.attr("nodes"); } - py::list nodes_list = py::list(nodes); int nodes_list_len = py::len(nodes_list); + + Graph& G_ref = G.cast(); + std::vector node_ids(nodes_list_len); + for (int i = 0; i < nodes_list_len; i++) { + node_ids[i] = G_ref.node_to_id[nodes_list[i]].cast(); + } + + std::unordered_map> weighted_adj; + std::unordered_map strength; + preprocess_graph_for_constraint(G_ref, weight_key, weighted_adj, strength); + std::vector constraint_results(nodes_list_len, 0.0); - if (G.attr("is_directed")().cast()) { - DiGraph& G_ = G.cast(); - for (int i = 0; i < nodes_list_len; i++) { - py::object v = nodes_list[i]; - node_t v_id = G_.node_to_id[v].cast(); - std::pair constraint_pair = - directed_compute_constraint_of_v(G_, v_id, weight_key, local_constraint_rec, sum_nmw_rec); - constraint_results[i] = constraint_pair.second; - } - } else { - Graph& G_ = G.cast(); + { + py::gil_scoped_release release; + #pragma omp parallel for schedule(dynamic) for (int i = 0; i < nodes_list_len; i++) { - py::object v = nodes_list[i]; - node_t v_id = G_.node_to_id[v].cast(); - std::pair constraint_pair = - compute_constraint_of_v(G_, v_id, weight_key, local_constraint_rec, sum_nmw_rec); - constraint_results[i] = constraint_pair.second; + node_t u = node_ids[i]; + + auto str_it = strength.find(u); + if (str_it == strength.end() || str_it->second == 0.0) { + constraint_results[i] = Py_NAN; + continue; + } + double u_strength = str_it->second; + + auto& neighbors_u = weighted_adj[u]; + if (neighbors_u.empty()) { + constraint_results[i] = Py_NAN; + continue; + } + + std::unordered_map contrib; + + for (auto& neighbor : neighbors_u) { + node_t j = neighbor.first; + double w_uj = neighbor.second; + double p_uj = w_uj / u_strength; + + contrib[j] += p_uj; + } + + for (auto& neighbor_j : neighbors_u) { + node_t j = neighbor_j.first; + double w_uj = neighbor_j.second; + double p_uj = w_uj / u_strength; + + auto q_it = weighted_adj.find(j); + if (q_it != weighted_adj.end()) { + double j_strength = strength[j]; + for (auto& neighbor_q : q_it->second) { + node_t q = neighbor_q.first; + if (q == u) continue; + + double w_jq = neighbor_q.second; + double p_jq = w_jq / j_strength; + + contrib[q] += p_uj * p_jq; + } + } + } + + double c_u = 0.0; + for (auto& neighbor : neighbors_u) { + node_t j = neighbor.first; + if (contrib.count(j)) { + c_u += pow(contrib[j], 2); + } + } + constraint_results[i] = c_u; } } - std::reverse(constraint_results.begin(), constraint_results.end()); - py::array::ShapeContainer ret_shape{nodes_list_len}; py::array_t ret(ret_shape, constraint_results.data()); return ret; @@ -221,7 +250,7 @@ static py::object invoke_gpu_constraint(py::object G, py::object nodes, py::obje std::vector& E = csr_graph->E; std::vector& row = coo_graph->row; std::vector& col = coo_graph->col; - std::vector *W_p = weight.is_none() ? &(coo_graph->unweighted_W) + std::vector *W_p = weight.is_none() ? &(coo_graph->unweighted_W) : coo_graph->W_map.find(weight_to_string(weight))->second.get(); std::unordered_map& node2idx = coo_graph->node2idx; int num_nodes = coo_graph->node2idx.size(); @@ -261,88 +290,259 @@ py::object constraint(py::object G, py::object nodes, py::object weight, py::obj #endif } -weight_t redundancy(Graph& G, node_t u, node_t v, std::string weight, rec_type& sum_nmw_rec, rec_type& max_nmw_rec) { - weight_t r = 0; - std::unordered_set neighbors; - for (const auto& n : G.adj[v]) { - neighbors.insert(n.first); +template +inline weight_t get_edge_weight(const MapType& attrs, const std::string& weight_key) { + if (weight_key.empty()) return 1.0; + auto it = attrs.find(weight_key); + return it != attrs.end() ? it->second : 1.0; +} + +inline weight_t compute_mutual_weight(const Graph& G, node_t u, node_t v, const std::string& weight_key) { + weight_t w = 0; + if (G.adj.count(u)) { + const auto& adj_u = G.adj.at(u); + auto it = adj_u.find(v); + if (it != adj_u.end()) w += get_edge_weight(it->second, weight_key); } - for (const auto& w : neighbors) { - r += normalized_mutual_weight(G, u, w, weight, sum, sum_nmw_rec) * normalized_mutual_weight(G, v, w, weight, max, max_nmw_rec); + if (G.adj.count(v)) { + const auto& adj_v = G.adj.at(v); + auto it = adj_v.find(u); + if (it != adj_v.end()) w += get_edge_weight(it->second, weight_key); } - return 1 - r; + return w; } -weight_t directed_redundancy(DiGraph& G, node_t u, node_t v, std::string weight, rec_type& sum_nmw_rec, rec_type& max_nmw_rec) { - weight_t r = 0; - std::unordered_set neighbors; - for (const auto& n : G.adj[v]) { - neighbors.insert(n.first); - } - for (const auto& n : G.pred[v]) { - neighbors.insert(n.first); +inline weight_t compute_directed_mutual_weight(const DiGraph& G, node_t u, node_t v, const std::string& weight_key) { + weight_t w = 0; + if (G.adj.count(u)) { + const auto& adj_u = G.adj.at(u); + auto it = adj_u.find(v); + if (it != adj_u.end()) w += get_edge_weight(it->second, weight_key); } - for (const auto& w : neighbors) { - r += directed_normalized_mutual_weight(G, u, w, weight, sum, sum_nmw_rec) * directed_normalized_mutual_weight(G, v, w, weight, max, max_nmw_rec); + if (G.adj.count(v)) { + const auto& adj_v = G.adj.at(v); + auto it = adj_v.find(u); + if (it != adj_v.end()) w += get_edge_weight(it->second, weight_key); } - return 1 - r; + return w; } -py::object invoke_cpp_effective_size(py::object G, py::object nodes, py::object weight) { - std::string weight_key = weight_to_string(weight); - rec_type sum_nmw_rec, max_nmw_rec; +std::vector compute_redundancy_core(py::object G_obj, const std::vector& target_nodes, const std::string& weight_key, bool is_directed) { + + // Cast to C++ objects once to avoid Python API overhead + const Graph* G_ptr = nullptr; + const DiGraph* DiG_ptr = nullptr; + if (is_directed) { + DiG_ptr = &G_obj.cast(); + } else { + G_ptr = &G_obj.cast(); + } - if (nodes.is_none()) { - nodes = G.attr("nodes"); + // Pre-compute max ID and node list + node_t max_graph_id = 0; + std::vector all_nodes_vec; + + if (is_directed) { + for (const auto& kv : DiG_ptr->adj) if (kv.first > max_graph_id) max_graph_id = kv.first; + for (const auto& kv : DiG_ptr->pred) if (kv.first > max_graph_id) max_graph_id = kv.first; + all_nodes_vec.reserve(DiG_ptr->adj.size() + DiG_ptr->pred.size()); + for(const auto& kv : DiG_ptr->adj) all_nodes_vec.push_back(kv.first); + for(const auto& kv : DiG_ptr->pred) all_nodes_vec.push_back(kv.first); + } else { + for (const auto& kv : G_ptr->adj) if (kv.first > max_graph_id) max_graph_id = kv.first; + all_nodes_vec.reserve(G_ptr->adj.size()); + for(const auto& kv : G_ptr->adj) all_nodes_vec.push_back(kv.first); } - py::list nodes_list = py::list(nodes); - int nodes_list_len = py::len(nodes_list); - std::vector effective_size_results(nodes_list_len, 0.0); + // Deduplicate nodes + std::sort(all_nodes_vec.begin(), all_nodes_vec.end()); + all_nodes_vec.erase(std::unique(all_nodes_vec.begin(), all_nodes_vec.end()), all_nodes_vec.end()); - if (!G.attr("is_directed")().cast()){ - Graph& G_ = G.cast(); - for (int i = 0; i < nodes_list_len; i++) { - weight_t redundancy_sum = 0; - py::object v = nodes_list[i]; - node_t v_id = G_.node_to_id[v].cast(); - if (py::len(G[v]) == 0) { - effective_size_results[i] = Py_NAN; + // Ensure vector size covers target nodes + if (!target_nodes.empty()) { + node_t max_target = *std::max_element(target_nodes.begin(), target_nodes.end()); + max_graph_id = std::max(max_graph_id, max_target); + } + + // Pre-compute Scale + std::vector scale_sum_vec(max_graph_id + 1, 0.0); + std::vector scale_max_vec(max_graph_id + 1, 0.0); + + #pragma omp parallel for schedule(dynamic) + for(int i = 0; i < all_nodes_vec.size(); ++i) { + node_t u = all_nodes_vec[i]; + double s_sum = 0; + double s_max = 0; + + if (is_directed) { + if (DiG_ptr->adj.count(u)) { + for(const auto& p : DiG_ptr->adj.at(u)) { + weight_t tw = compute_directed_mutual_weight(*DiG_ptr, u, p.first, weight_key); + s_sum += tw; s_max = std::max(s_max, (double)tw); + } + } + if (DiG_ptr->pred.count(u)) { + for(const auto& p : DiG_ptr->pred.at(u)) { + weight_t tw = compute_directed_mutual_weight(*DiG_ptr, u, p.first, weight_key); + s_sum += tw; s_max = std::max(s_max, (double)tw); + } + } + } else { + if (G_ptr->adj.count(u)) { + for(const auto& p : G_ptr->adj.at(u)) { + weight_t tw = compute_mutual_weight(*G_ptr, u, p.first, weight_key); + s_sum += tw; s_max = std::max(s_max, (double)tw); + } + } + } + if (u < scale_sum_vec.size()) { + scale_sum_vec[u] = s_sum; + scale_max_vec[u] = s_max; + } + } + + // Compute Redundancy + std::vector results(target_nodes.size()); + + if (!is_directed) { + // Undirected + #pragma omp parallel for schedule(dynamic) + for (int i = 0; i < target_nodes.size(); i++) { + node_t v_id = target_nodes[i]; + + if (G_ptr->adj.find(v_id) == G_ptr->adj.end() || G_ptr->adj.at(v_id).empty()) { + results[i] = NAN; continue; } - for (const auto& neighbor_info : G_.adj[v_id]) { + + const auto& v_neighbors = G_ptr->adj.at(v_id); + double redundancy_sum = 0; + double scale_v_sum = (v_id < scale_sum_vec.size()) ? scale_sum_vec[v_id] : 0; + + // Direct iteration avoids malloc locks + for (const auto& neighbor_info : v_neighbors) { node_t u_id = neighbor_info.first; - redundancy_sum += redundancy(G_, v_id, u_id, weight_key, sum_nmw_rec, max_nmw_rec); + double scale_u_max = (u_id < scale_max_vec.size()) ? scale_max_vec[u_id] : 0; + double r_vu = 0; + + for (const auto& w_pair : v_neighbors) { + node_t w_id = w_pair.first; + if (u_id == w_id) continue; + + weight_t mw_uw = compute_mutual_weight(*G_ptr, u_id, w_id, weight_key); + if (mw_uw == 0) continue; + + weight_t mw_vw = compute_mutual_weight(*G_ptr, v_id, w_id, weight_key); + + double p_iq = (scale_v_sum > 0) ? (mw_vw / scale_v_sum) : 0; + double m_jq = (scale_u_max > 0) ? (mw_uw / scale_u_max) : 0; + + r_vu += p_iq * m_jq; + } + redundancy_sum += (1.0 - r_vu); } - effective_size_results[i] = redundancy_sum; + results[i] = redundancy_sum; } - } else{ - DiGraph& G_ = G.cast(); - for (int i = 0; i < nodes_list_len; i++) { - weight_t redundancy_sum = 0; - py::object v = nodes_list[i]; - node_t v_id = G_.node_to_id[v].cast(); - if (py::len(G[v]) == 0) { - effective_size_results[i] = Py_NAN; + } else { + //Directed + #pragma omp parallel for schedule(dynamic) + for (int i = 0; i < target_nodes.size(); i++) { + node_t v_id = target_nodes[i]; + + bool has_neighbors = (DiG_ptr->adj.count(v_id) && !DiG_ptr->adj.at(v_id).empty()) || + (DiG_ptr->pred.count(v_id) && !DiG_ptr->pred.at(v_id).empty()); + + if (!has_neighbors) { + results[i] = NAN; continue; } - for (const auto& neighbor_info : G_.adj[v_id]) { - node_t u_id = neighbor_info.first; - redundancy_sum += directed_redundancy(G_, v_id, u_id, weight_key, sum_nmw_rec, max_nmw_rec); + + double redundancy_sum = 0; + double scale_v_sum = (v_id < scale_sum_vec.size()) ? scale_sum_vec[v_id] : 0; + + // Prepare common candidates + std::vector common_candidates; + if (DiG_ptr->adj.count(v_id)) { + for(auto& p : DiG_ptr->adj.at(v_id)) common_candidates.push_back(p.first); } - for (const auto& neighbor_info : G_.pred[v_id]) { - node_t u_id = neighbor_info.first; - redundancy_sum += directed_redundancy(G_, v_id, u_id, weight_key, sum_nmw_rec, max_nmw_rec); + if (DiG_ptr->pred.count(v_id)) { + for(auto& p : DiG_ptr->pred.at(v_id)) common_candidates.push_back(p.first); } - effective_size_results[i] = redundancy_sum; + std::sort(common_candidates.begin(), common_candidates.end()); + common_candidates.erase(std::unique(common_candidates.begin(), common_candidates.end()), common_candidates.end()); + + // Loop A: Out-neighbors + if (DiG_ptr->adj.count(v_id)) { + for (const auto& neighbor_info : DiG_ptr->adj.at(v_id)) { + node_t u_id = neighbor_info.first; + double scale_u_max = (u_id < scale_max_vec.size()) ? scale_max_vec[u_id] : 0; + double r_vu = 0; + + for (const auto& w_id : common_candidates) { + if (u_id == w_id) continue; + weight_t mw_uw = compute_directed_mutual_weight(*DiG_ptr, u_id, w_id, weight_key); + if (mw_uw == 0) continue; + weight_t mw_vw = compute_directed_mutual_weight(*DiG_ptr, v_id, w_id, weight_key); + + double p_iq = (scale_v_sum > 0) ? (mw_vw / scale_v_sum) : 0; + double m_jq = (scale_u_max > 0) ? (mw_uw / scale_u_max) : 0; + r_vu += p_iq * m_jq; + } + redundancy_sum += (1.0 - r_vu); + } + } + + // Loop B: In-neighbors + if (DiG_ptr->pred.count(v_id)) { + for (const auto& neighbor_info : DiG_ptr->pred.at(v_id)) { + node_t u_id = neighbor_info.first; + double scale_u_max = (u_id < scale_max_vec.size()) ? scale_max_vec[u_id] : 0; + double r_vu = 0; + + for (const auto& w_id : common_candidates) { + if (u_id == w_id) continue; + weight_t mw_uw = compute_directed_mutual_weight(*DiG_ptr, u_id, w_id, weight_key); + if (mw_uw == 0) continue; + weight_t mw_vw = compute_directed_mutual_weight(*DiG_ptr, v_id, w_id, weight_key); + + double p_iq = (scale_v_sum > 0) ? (mw_vw / scale_v_sum) : 0; + double m_jq = (scale_u_max > 0) ? (mw_uw / scale_u_max) : 0; + r_vu += p_iq * m_jq; + } + redundancy_sum += (1.0 - r_vu); + } + } + results[i] = redundancy_sum; + } + } + + return results; +} + +py::object invoke_cpp_effective_size(py::object G, py::object nodes, py::object weight) { + std::string weight_key = weight.is_none() ? "" : weight.cast(); + bool is_directed = G.attr("is_directed")().cast(); + + if (nodes.is_none()) nodes = G.attr("nodes"); + py::list nodes_list = py::list(nodes); + size_t len = py::len(nodes_list); + std::vector target_ids(len); + + if (py::hasattr(G, "node_to_id")) { + py::object node_to_id = G.attr("node_to_id"); + for (size_t i = 0; i < len; i++) { + target_ids[i] = node_to_id[nodes_list[i]].cast(); + } + } else { + for (size_t i = 0; i < len; i++) { + target_ids[i] = nodes_list[i].cast(); } } - std::reverse(effective_size_results.begin(), effective_size_results.end()); + std::vector results = compute_redundancy_core(G, target_ids, weight_key, is_directed); - py::array::ShapeContainer ret_shape{nodes_list_len}; - py::array_t ret(ret_shape, effective_size_results.data()); - return ret; + py::array::ShapeContainer ret_shape{ (long)results.size() }; + return py::array_t(ret_shape, results.data()); } #ifdef EASYGRAPH_ENABLE_GPU @@ -435,7 +635,7 @@ static py::object invoke_gpu_efficiency(py::object G, py::object nodes, py::obje nodes_list = py::list(nodes); for (auto node : nodes_list) { int node_id = node2idx[G_.node_to_id[node].cast()]; - node_mask[node_id] = 1; + node_mask[node_id] = 1; } } else { nodes_list = py::list(G.attr("nodes")); @@ -487,80 +687,66 @@ static py::object invoke_gpu_efficiency(py::object G, py::object nodes, py::obje py::object invoke_cpp_efficiency(py::object G, py::object nodes, py::object weight, py::object n_workers) { - rec_type sum_nmw_rec, max_nmw_rec; - py::dict effective_size_dict = py::dict(); - if (nodes.is_none()) { - nodes = G; - } - nodes = py::list(nodes); - if (!G.attr("is_directed")().cast()){ - Graph& G_ = G.cast(); - std::string weight_key = weight_to_string(weight); - int nodes_len = py::len(nodes); - for (int i = 0; i < nodes_len; i++) { - py::object v = nodes[py::cast(i)]; - if (py::len(G[v]) == 0) { - effective_size_dict[v] = py::cast(Py_NAN); - continue; - } - weight_t redundancy_sum = 0; - node_t v_id = G_.node_to_id[v].cast(); - for (const auto& neighbor_info : G_.adj[v_id]) { - node_t u_id = neighbor_info.first; - redundancy_sum += redundancy(G_, v_id, u_id, weight_key, sum_nmw_rec, max_nmw_rec); - } - effective_size_dict[v] = redundancy_sum; + std::string weight_key = weight.is_none() ? "" : weight.cast(); + bool is_directed = G.attr("is_directed")().cast(); + + // Parsing Nodes + if (nodes.is_none()) nodes = G.attr("nodes"); + py::list nodes_list = py::list(nodes); + size_t len = py::len(nodes_list); + std::vector target_ids(len); + + if (py::hasattr(G, "node_to_id")) { + py::object node_to_id = G.attr("node_to_id"); + for (size_t i = 0; i < len; i++) { + target_ids[i] = node_to_id[nodes_list[i]].cast(); } - } else{ - DiGraph& G_ = G.cast(); - std::string weight_key = weight_to_string(weight); - int nodes_len = py::len(nodes); - for (int i = 0; i < nodes_len; i++) { - py::object v = nodes[py::cast(i)]; - if (py::len(G[v]) == 0) { - effective_size_dict[v] = py::cast(Py_NAN); - continue; - } - weight_t redundancy_sum = 0; - node_t v_id = G_.node_to_id[v].cast(); - for (const auto& neighbor_info : G_.adj[v_id]) { - node_t u_id = neighbor_info.first; - redundancy_sum += directed_redundancy(G_, v_id, u_id, weight_key, sum_nmw_rec, max_nmw_rec); - } - for (const auto& neighbor_info : G_.pred[v_id]) { - node_t u_id = neighbor_info.first; - redundancy_sum += directed_redundancy(G_, v_id, u_id, weight_key, sum_nmw_rec, max_nmw_rec); - } - effective_size_dict[v] = redundancy_sum; + } else { + for (size_t i = 0; i < len; i++) { + target_ids[i] = nodes_list[i].cast(); } } - py::dict degree; - if (weight.is_none()) { - degree = G.attr("degree")(py::none()).cast(); - } else { - degree = G.attr("degree")(weight).cast(); - } + // Compute Efficiency = Effective Size / Degree + std::vector eff_sizes = compute_redundancy_core(G, target_ids, weight_key, is_directed); - py::dict efficiency_dict; - for (auto item : effective_size_dict) { - int node = py::reinterpret_borrow(item.first).cast(); - double eff_size = py::reinterpret_borrow(item.second).cast(); + // Cast Graph pointers for fast degree access + const Graph* G_ptr = nullptr; + const DiGraph* DiG_ptr = nullptr; + if (is_directed) DiG_ptr = &G.cast(); + else G_ptr = &G.cast(); - if (!degree.contains(py::cast(node))) { + std::vector efficiency_results(len); + + #pragma omp parallel for schedule(static) + for (size_t i = 0; i < len; ++i) { + double es = eff_sizes[i]; + + // Propagate NAN from core + if (std::isnan(es)) { + efficiency_results[i] = NAN; continue; } - double node_degree = py::reinterpret_borrow(degree[py::cast(node)]).cast(); - if (node_degree == 0.0) { - efficiency_dict[py::cast(node)] = py::cast(Py_NAN); + node_t v = target_ids[i]; + double degree = 0; + + if (is_directed) { + if (DiG_ptr->adj.count(v)) degree += DiG_ptr->adj.at(v).size(); + if (DiG_ptr->pred.count(v)) degree += DiG_ptr->pred.at(v).size(); } else { - double efficiency_value = eff_size / node_degree; - efficiency_dict[py::cast(node)] = py::cast(efficiency_value); + if (G_ptr->adj.count(v)) degree += G_ptr->adj.at(v).size(); + } + + if (degree > 0) { + efficiency_results[i] = es / degree; + } else { + efficiency_results[i] = NAN; } } - return efficiency_dict; + py::array::ShapeContainer ret_shape{ (long)len }; + return py::array_t(ret_shape, efficiency_results.data()); } py::object efficiency(py::object G, py::object nodes, py::object weight, py::object n_workers) { @@ -627,7 +813,7 @@ py::object invoke_cpp_hierarchy(py::object G, py::object nodes, py::object weigh py::list nodes_list = py::list(nodes); int nodes_list_len = py::len(nodes_list); py::dict hierarchy = py::dict(); - + if(G.attr("is_directed")().cast()){ DiGraph& G_ = G.cast(); for (int i = 0; i < nodes_list_len; i++) { @@ -761,7 +947,7 @@ static py::object invoke_gpu_hierarchy(py::object G, py::object nodes, py::objec std::vector& E = csr_graph->E; std::vector& row = coo_graph->row; std::vector& col = coo_graph->col; - std::vector *W_p = weight.is_none() ? &(coo_graph->unweighted_W) + std::vector *W_p = weight.is_none() ? &(coo_graph->unweighted_W) : coo_graph->W_map.find(weight_to_string(weight))->second.get(); std::unordered_map& node2idx = coo_graph->node2idx; int num_nodes = coo_graph->node2idx.size(); @@ -777,7 +963,7 @@ static py::object invoke_gpu_hierarchy(py::object G, py::object nodes, py::objec } } else { nodes_list = py::list(G.attr("nodes")); - std::fill(node_mask.begin(), node_mask.end(), 1); + std::fill(node_mask.begin(), node_mask.end(), 1); } int gpu_r = gpu_easygraph::hierarchy(V, E, row, col, num_nodes, *W_p, is_directed, node_mask, hierarchy_results); diff --git a/easygraph/functions/centrality/__init__.py b/easygraph/functions/centrality/__init__.py index f37e55fe..bd2fec50 100644 --- a/easygraph/functions/centrality/__init__.py +++ b/easygraph/functions/centrality/__init__.py @@ -5,4 +5,5 @@ from .flowbetweenness import * from .laplacian import * from .pagerank import * -from .katz_centrality import * \ No newline at end of file +from .katz_centrality import * +from .eigenvector import * \ No newline at end of file diff --git a/easygraph/functions/centrality/degree.py b/easygraph/functions/centrality/degree.py index 6b97a792..9d3d0b29 100644 --- a/easygraph/functions/centrality/degree.py +++ b/easygraph/functions/centrality/degree.py @@ -5,6 +5,7 @@ @not_implemented_for("multigraph") +@hybrid("cpp_degree_centrality") def degree_centrality(G): """Compute the degree centrality for nodes in a bipartite network. @@ -36,6 +37,7 @@ def degree_centrality(G): @not_implemented_for("multigraph") @only_implemented_for_Directed_graph +@hybrid("cpp_in_degree_centrality") def in_degree_centrality(G): """Compute the in-degree centrality for nodes. @@ -80,6 +82,7 @@ def in_degree_centrality(G): @not_implemented_for("multigraph") @only_implemented_for_Directed_graph +@hybrid("cpp_out_degree_centrality") def out_degree_centrality(G): """Compute the out-degree centrality for nodes. diff --git a/easygraph/functions/centrality/eigenvector.py b/easygraph/functions/centrality/eigenvector.py new file mode 100644 index 00000000..f38085d1 --- /dev/null +++ b/easygraph/functions/centrality/eigenvector.py @@ -0,0 +1,154 @@ +import math +import easygraph as eg +from easygraph.utils import * +from easygraph.utils.decorators import * +from scipy import sparse +from scipy.sparse import linalg +import numpy as np +from collections import defaultdict + +__all__ = ["eigenvector_centrality"] + +@not_implemented_for("multigraph") +@hybrid("cpp_eigenvector_centrality") +def eigenvector_centrality(G, max_iter=100, tol=1.0e-6, nstart=None, weight=None): + """Calculate eigenvector centrality for nodes in the graph + + Eigenvector centrality is based on the idea that a node's importance + depends on the importance of its neighboring nodes. + Specifically, a node's centrality is proportional to the sum of + centrality values of its neighbors. + + Parameters + ---------- + G : graph object + An undirected or directed graph + + max_iter : int, optional (default=100) + Maximum number of iterations for the power method + + tol : float, optional (default=1.0e-6) + Convergence threshold; algorithm terminates when the difference + between centrality values in consecutive iterations is less than this value + + nstart : dictionary, optional (default=None) + Dictionary mapping nodes to initial centrality values + If None, the ARPACK solver is used to directly compute the eigenvector + + weight : string or None, optional (default=None) + Name of the edge attribute to be used as edge weight + If None, all edges are considered to have weight 1 + + Returns + ------- + centrality : dictionary + Dictionary mapping nodes to their eigenvector centrality values + + Raises + ------ + EasyGraphPointlessConcept + When input is an empty graph + + EasyGraphError + When the algorithm fails to converge within the specified maximum iterations + + Notes + ----- + This algorithm uses the power iteration method to find the principal eigenvector. + When nstart is not provided, the ARPACK solver is used for efficiency. + The returned centrality values are normalized. + """ + + if len(G) == 0: + raise eg.EasyGraphPointlessConcept( + "cannot compute centrality for the null graph" + ) + + if len(G) == 1: + raise eg.EasyGraphPointlessConcept( + "cannot compute eigenvector centrality for a single node graph" + ) + + + # Build node list and mapping + nodelist = list(G.nodes) + n = len(nodelist) + node_map = {node: i for i, node in enumerate(nodelist)} + + # Build weighted adjacency matrix + row, col, data = [], [], [] + for u in nodelist: + u_idx = node_map[u] + for v, attrs in G[u].items(): + if v in node_map: + v_idx = node_map[v] + w = attrs.get(weight, 1.0) if weight else 1.0 + # Build transpose matrix for centrality calculation + row.append(v_idx) + col.append(u_idx) + data.append(float(w)) + + # Create CSR format sparse matrix + A = sparse.csr_matrix((data, (row, col)), shape=(n, n)) + + # Detect and handle isolated nodes + row_sums = np.array(A.sum(axis=1)).flatten() + col_sums = np.array(A.sum(axis=0)).flatten() + isolated_nodes = np.where((row_sums == 0) & (col_sums == 0))[0] + + has_isolated = len(isolated_nodes) > 0 + isolated_indices = [] + + # Add small self-loops to isolated nodes for stability + if has_isolated: + # Store isolated node indices + isolated_indices = isolated_nodes.tolist() + + # Add small self-loop weights to isolated nodes + for idx in isolated_indices: + A[idx, idx] = 1.0e-4 # Small enough to not affect results, but maintains numerical stability + if nstart is not None: + # Use custom initial vector for power iteration + v = np.array([nstart.get(n, 1.0) for n in nodelist], dtype=float) + v = v / np.sum(np.abs(v)) + + # Power iteration method to compute principal eigenvector + v_last = np.zeros_like(v) + for _ in range(max_iter): + np.copyto(v_last, v) + v = A @ v_last # Sparse matrix multiplication + + norm = np.linalg.norm(v) + if norm < 1e-10: + v = v_last.copy() + break + v = v / norm # Normalization + + # Check convergence + if np.linalg.norm(v - v_last) < tol: + break + else: + raise eg.EasyGraphError(f"Eigenvector calculation did not converge in {max_iter} iterations") + + centrality = v + else: + # Use ARPACK solver to directly compute the principal eigenvector + eigenvalues, eigenvectors = linalg.eigs(A, k=1, which='LR', + maxiter=max_iter, tol=tol) + centrality = np.real(eigenvectors[:,0]) + + # Ensure positive results and normalize + if centrality.sum() < 0: + centrality = -centrality + + centrality = centrality / np.linalg.norm(centrality) + # Set centrality of isolated nodes to zero + if has_isolated: + for idx in isolated_indices: + centrality[idx] = 0.0 + # Renormalize if needed + if np.sum(centrality) > 0: + centrality = centrality / np.linalg.norm(centrality) + + # Return dictionary of node centrality values + return {nodelist[i]: float(centrality[i]) for i in range(n)} \ No newline at end of file diff --git a/easygraph/functions/centrality/pagerank.py b/easygraph/functions/centrality/pagerank.py index e1bc767a..fa8112fa 100644 --- a/easygraph/functions/centrality/pagerank.py +++ b/easygraph/functions/centrality/pagerank.py @@ -8,7 +8,7 @@ @not_implemented_for("multigraph") @hybrid("cpp_pagerank") -def pagerank(G, alpha=0.85): +def pagerank(G, alpha=0.85, weight=None): """ Returns the PageRank value of each node in G. @@ -20,12 +20,15 @@ def pagerank(G, alpha=0.85): alpha : float The damping factor. Default is 0.85 + weight : None or string, optional (default=None) + If None, all edge weights are considered equal. + Otherwise holds the name of the edge attribute used as weight. """ import numpy as np if len(G) == 0: return {} - M = google_matrix(G, alpha=alpha) + M = google_matrix(G, alpha=alpha, weight=weight) # use numpy LAPACK solver eigenvalues, eigenvectors = np.linalg.eig(M.T) @@ -36,10 +39,10 @@ def pagerank(G, alpha=0.85): return dict(zip(G, map(float, largest / norm))) -def google_matrix(G, alpha): +def google_matrix(G, alpha, weight=None): import numpy as np - M = eg.to_numpy_array(G) + M = eg.to_numpy_array(G, weight=weight).astype(float) N = len(G) if N == 0: return M