In [1]:
%matplotlib inline
import sys
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import scipy as sp

# Data reading
import pandas as pd
import csv
import pickle

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# networkx

import networkx as nx
from networkx.algorithms import community
from networkx.algorithms.community import greedy_modularity_communities
from networkx.algorithms.community import k_clique_communities

from community import community_louvain

import scipy.sparse.linalg

In [2]:
# Read networks from file
#G_CDC28=nx.read_weighted_edgelist("./../Data/4932_protein_links_v11_0.txt",comments="#",nodetype=str)
G_CDC28=nx.read_weighted_edgelist("4932.protein.links.v11.0.txt",comments="#",nodetype=str)

print('number of nodes of G:',nx.number_of_nodes(G_CDC28))
print('number of edges of G:',nx.number_of_edges(G_CDC28))

number of nodes of G: 6574
number of edges of G: 922983


In [3]:
node_target = '4932.YBR160W' #CDC28
print('The target node has %i links'%G_CDC28.degree(node_target))

The target node has 1401 links


In [4]:
# Choose the network to be analyzed below
G0=G_CDC28
# delete those edges with a combined score of <= thershold_score (small confidence)
threshold_score = 700
#threshold_score = 0
for edge in G0.edges: 
    G0.get_edge_data(edge[0],edge[1])
    weight = list(G0.get_edge_data(edge[0],edge[1]).values())
    #print('qwe',weight[0])
    if(weight[0] <= threshold_score):
        G0.remove_edge(edge[0],edge[1])
# restrict to largest connected component
largest_cc = max(nx.connected_components(G0),key=len)
G0=G0.subgraph(largest_cc)

In [5]:
partLouvain = community_louvain.best_partition(G0,resolution=0.5,random_state=1)
number_of_communities = max(list(partLouvain.values()))+1
print('# of partitions for Louvain modularity =',number_of_communities)

# of partitions for Louvain modularity = 29


In [6]:
node_target = '4932.YBR160W' # CDC28
print('The target protein CDC28 belongs to community #',partLouvain[node_target])

The target protein CDC28 belongs to community # 3


In [7]:
# define the communities as separate graphs

#subgraph dict with community number as key and subgraph as value
G_cluster = {}

#node dict with community number as key and node as value
nodes = {}
for i in range(number_of_communities):
    nodes[i] = []

for name, community in partLouvain.items():
    nodes[community].append(name)
    
for key in nodes.keys():
    G_cluster[key] = G0.subgraph(nodes[key])

In [8]:
top10_final = {}
for i in range(number_of_communities):
    top10_final[i] = []
    G = G_cluster[i]
    measuresNames=["Degree","Eigenvector","Page Rank"]
    if len(G.nodes)>2:
        listMeasures = [dict(nx.degree(G)),nx.eigenvector_centrality_numpy(G),nx.pagerank(G)]
   
    if len(G.nodes)<=2:
        listMeasures = [dict(nx.degree(G)),nx.eigenvector_centrality(G),nx.pagerank(G)]
   
    for idx,dictMeasure in enumerate(listMeasures):
        top10 = [[dictMeasure[k],k] for k in dictMeasure.keys()] # Choose the 10 largest values
        top10.sort(reverse=True)
        top10_final[i].append(top10)
        print("\n Centrality Measure in Cluster:",str(i),measuresNames[idx])
        for idx,pair in enumerate(top10[:10]):
            print(str(idx+1),": \t is node ",pair[1],' with value: %.4f \t' %(pair[0]))


2.YJL034W  with value: 0.0162 	
2 : 	 is node  4932.YPL240C  with value: 0.0151 	
3 : 	 is node  4932.YMR186W  with value: 0.0121 	
4 : 	 is node  4932.YOR027W  with value: 0.0117 	
5 : 	 is node  4932.YPL106C  with value: 0.0105 	
6 : 	 is node  4932.YCL043C  with value: 0.0105 	
7 : 	 is node  4932.YNL064C  with value: 0.0100 	
8 : 	 is node  4932.YEL029C  with value: 0.0095 	
9 : 	 is node  4932.YKL073W  with value: 0.0094 	
10 : 	 is node  4932.YJR156C  with value: 0.0091 	

 Centrality Measure in Cluster: 17 Degree
1 : 	 is node  4932.YEL061C  with value: 69.0000 	
2 : 	 is node  4932.YCL029C  with value: 68.0000 	
3 : 	 is node  4932.YPL269W  with value: 67.0000 	
4 : 	 is node  4932.YLR045C  with value: 65.0000 	
5 : 	 is node  4932.YOR195W  with value: 63.0000 	
6 : 	 is node  4932.YDL028C  with value: 62.0000 	
7 : 	 is node  4932.YOL069W  with value: 60.0000 	
8 : 	 is node  4932.YJL030W  with value: 60.0000 	
9 : 	 is node  4932.YPL018W  with value: 57.0000 	
10 : 	 is node 

In [9]:
# write data file
central_nodes = {'COMMUNITY':[], 'RANK':[], 'DEGREE_NODE':[],'DEGREE_VALUE':[], 'EIGENVECTOR_NODE':[],'EIGENVECTOR_VALUE':[], 'KATZ_NODE':[], 'KATZ_VALUE':[], 'PAGERANK_NODE':[], 'PAGERANK_VALUE':[], 'BETWEENESS_NODE':[], 'BETWEENESS_VALUE':[]}
 
for i in range(number_of_communities):
    G = G_cluster[i]
    #the number of nodes in top10 central nodes
    row_number = min(len(G.nodes),10)
    central_nodes['COMMUNITY'].extend([i]*row_number)
    central_nodes['RANK'].extend(list(range(1,row_number+1)))
    
    for j in range(row_number):
        central_nodes['DEGREE_NODE'].append(top10_final[i][0][j][1][5:])
        central_nodes['DEGREE_VALUE'].append(top10_final[i][0][j][0])
        central_nodes['EIGENVECTOR_NODE'].append(top10_final[i][1][j][1][5:])
        central_nodes['EIGENVECTOR_VALUE'].append(top10_final[i][1][j][0])
        central_nodes['PAGERANK_NODE'].append(top10_final[i][2][j][1][5:])
        central_nodes['PAGERANK_VALUE'].append(top10_final[i][2][j][0])

df = pd.DataFrame(central_nodes, columns= ['COMMUNITY','RANK', 'DEGREE_NODE','DEGREE_VALUE', 'EIGENVECTOR_NODE','EIGENVECTOR_VALUE', 'PAGERANK_NODE', 'PAGERANK_VALUE'])
df.to_csv("CDC28_Centrality_Louvain_thresh%s.csv" %threshold_score, index=False)
