In [1]:
%matplotlib inline
import sys
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import scipy as sp

# Data reading
import pandas as pd
import csv
import pickle

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# networkx

import networkx as nx
from networkx.algorithms import community
from networkx.algorithms.community import greedy_modularity_communities
from networkx.algorithms.community import k_clique_communities

from community import community_louvain

import scipy.sparse.linalg

In [2]:
# Read networks from file
#G_CDC28=nx.read_weighted_edgelist("./../Data/4932_protein_links_v11_0.txt",comments="#",nodetype=str)
G_CDC28=nx.read_weighted_edgelist("4932.protein.links.v11.0.txt",comments="#",nodetype=str)

print('number of nodes of G:',nx.number_of_nodes(G_CDC28))
print('number of edges of G:',nx.number_of_edges(G_CDC28))

number of nodes of G: 6574
number of edges of G: 922983


In [3]:
node_target = '4932.YBR160W' #CDC28
print('The target node has %i links'%G_CDC28.degree(node_target))

The target node has 1401 links


In [4]:
# Choose the network to be analyzed below
G0=G_CDC28
# delete those edges with a combined score of <= thershold_score (small confidence)
threshold_score = 700
#threshold_score = 0
for edge in G0.edges: 
    G0.get_edge_data(edge[0],edge[1])
    weight = list(G0.get_edge_data(edge[0],edge[1]).values())
    #print('qwe',weight[0])
    if(weight[0] <= threshold_score):
        G0.remove_edge(edge[0],edge[1])
# restrict to largest connected component
largest_cc = max(nx.connected_components(G0),key=len)
G0=G0.subgraph(largest_cc)

In [5]:
partLouvain = community_louvain.best_partition(G0,resolution=0.5,random_state=1)
number_of_communities = max(list(partLouvain.values()))+1
print('# of partitions for Louvain modularity =',number_of_communities)

# of partitions for Louvain modularity = 29


The target protein CDC28 belongs to community # 3


In [7]:
# define the communities as separate graphs

#subgraph dict with community number as key and subgraph as value
G_cluster = {}

#node dict with community number as key and node as value
nodes = {}
for i in range(number_of_communities):
    nodes[i] = []

for name, community in partLouvain.items():
    nodes[community].append(name)
    
for key in nodes.keys():
    G_cluster[key] = G0.subgraph(nodes[key])

In [22]:
node_target = '4932.YBR160W' # CDC28
print('The target protein CDC28 belongs to community #',partLouvain[node_target])
for key,val in G_cluster.items():
    print("\ncommunity: "+str(key)+" of size " + str(len(val.nodes())))
    out = list(val.nodes())[:10]
    print(str(out)+'...')

The target protein CDC28 belongs to community # 3

community: 0 of size 9
['4932.Q0017', '4932.Q0182', '4932.Q0092', '4932.Q0297', '4932.Q0032', '4932.Q0010', '4932.Q0143', '4932.YHR021W-A', '4932.Q0142']...

community: 1 of size 620
['4932.YBL056W', '4932.YNL084C', '4932.YMR217W', '4932.YHR107C', '4932.YDR063W', '4932.YBL007C', '4932.YKR075C', '4932.YPR085C', '4932.YKR055W', '4932.YKL109W']...

community: 2 of size 221
['4932.YMR194W', '4932.YJL189W', '4932.YDR172W', '4932.YER019C-A', '4932.YNL096C', '4932.YLR192C', '4932.YIL052C', '4932.YGR054W', '4932.YOR361C', '4932.YDR086C']...

community: 3 of size 300
['4932.YFR052W', '4932.YGR269W', '4932.YNL311C', '4932.YKL145W', '4932.YDR113C', '4932.YDR451C', '4932.YIL075C', '4932.YNL155W', '4932.YPR109W', '4932.YKR017C']...

community: 4 of size 438
['4932.YGL107C', '4932.YOR221C', '4932.YNL037C', '4932.YKL148C', '4932.YNL052W', '4932.YGR112W', '4932.YMR228W', '4932.YIR020C-B', '4932.YNR001C', '4932.YPL189C-A']...

community: 5 of size 177
