In [1]:
import sys
import pandas as pd
import networkx as nx
import pickle
import multiprocessing
from tqdm import tqdm

In [19]:
def load_obj(file_path):

    try:
        with open(file_path, 'rb') as f:
            return pickle.load(f)
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None
def calc_network_centrality_RWR(args):
    network, DIP_list, result_save_dir = args
   
    network_nodes = network.nodes()

    try:
        eigenvector_centrality_dict = nx.eigenvector_centrality(network)
    except nx.exception.NetworkXException:
        eigenvector_centrality_dict = {node: 0.0 for node in network_nodes}

    try:
        degree_centrality_dict = nx.degree_centrality(network)
    except nx.exception.NetworkXException:
        degree_centrality_dict = {node: 0.0 for node in network_nodes}

    try:
        between_centrality_dict = nx.betweenness_centrality(network)
    except nx.exception.NetworkXException:
        between_centrality_dict = {node: 0.0 for node in network_nodes}

    edge_BC_no_Zero = {k: v + 0.1 for k, v in nx.edge_betweenness_centrality(network).items()}

    nx.set_edge_attributes(network, edge_BC_no_Zero, 'weight')

    start_genes_for_PR = {gene: 1 for gene in DIP_list}

    PR_score = {}
    with tqdm(total=len(network_nodes), desc="Calculating RWR scores") as pbar:
        for node in network_nodes:
            PR_score[node] = nx.pagerank(network, personalization={node: 1})
            pbar.update(1)

    network_property_df = pd.DataFrame(columns=['Eigen', 'Degree', 'Between', 'RWR'])
    for node in network_nodes:
        network_property_df.loc[node] = [
            eigenvector_centrality_dict.get(node, 0.0),
            degree_centrality_dict.get(node, 0.0),
            between_centrality_dict.get(node, 0.0),
            PR_score[node].get(node, 0.0)
        ]

    network_property_df.to_csv(result_save_dir, index_label='Node')

def mainp02(cores):
    
    print("Read SIP network")
    SIG_G = load_obj("Result/Network/metadata_M13_All_Structure_All_Shortest_Paths_graph.pkl")

    centrality_result_addr = "Result/Network_analysis/metadata_M13_centrality_RWR_result.csv"
    DIP_addr = "Data/DIP/metadata_M13.txt"

    DIP_df = pd.read_csv(DIP_addr, sep='\t')
    DIP_list = DIP_df['StringGene'].to_list()

    # Remove genes not present in SIG_G
    DIP_list = [node for node in DIP_list if node in SIG_G]

    if not DIP_list:
        print("No DIP genes found in the network.")
        return

    print("Start to analyze SIP network using multiple centrality methods")
   
    args_list = [(SIG_G, DIP_list, centrality_result_addr) for _ in range(4)]
    
    
    with multiprocessing.Pool(processes=4) as pool:
      
        results = list(tqdm(pool.imap(calc_network_centrality_RWR, args_list), total=len(args_list), desc="Processing"))
    print("Finish")

In [None]:
mainp02(cores = 4)