In [1]:
%matplotlib inline
import sys
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import scipy as sp

# Data reading
import pandas as pd
import csv
import pickle

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# networkx

import networkx as nx
from networkx.algorithms import community
from networkx.algorithms.community import greedy_modularity_communities
from networkx.algorithms.community import k_clique_communities

from community import community_louvain

import scipy.sparse.linalg

In [2]:
# Read networks from file
#G_CDC28=nx.read_weighted_edgelist("./../Data/4932_protein_links_v11_0.txt",comments="#",nodetype=str)
G_CDC6=nx.read_weighted_edgelist("4932.protein.links.v11.0.txt",comments="#",nodetype=str)

print('number of nodes of G:',nx.number_of_nodes(G_CDC6))
print('number of edges of G:',nx.number_of_edges(G_CDC6))

number of nodes of G: 6574
number of edges of G: 922983


In [3]:
# Choose the network to be analyzed below
G0=G_CDC6

print('number of nodes of the full G:',len(G0.nodes))
print('number of edges of the full G:',nx.number_of_edges(G0))
print('Is the full G connected?',nx.connected.is_connected(G0))
print('How many connected subgraphs are there?',nx.connected.number_connected_components(G0))

# delete those edges with a combined score of <= thershold_score (small confidence)
threshold_score = 700
#threshold_score = 0

for edge in G0.edges: 
    G0.get_edge_data(edge[0],edge[1])
    weight = list(G0.get_edge_data(edge[0],edge[1]).values())
    #print('qwe',weight[0])
    if(weight[0] <= threshold_score):
        G0.remove_edge(edge[0],edge[1])

# restrict to largest connected component
largest_cc = max(nx.connected_components(G0),key=len)
G0=G0.subgraph(largest_cc)
print('number of nodes of restricted G:',len(G0.nodes))
print('number of edges of restricted G:',nx.number_of_edges(G0))

# randomize according to degree-preserving Maslov-Sneppen algorithm
# G0_randomized = nx.random_reference(G0,connectivity=True) 


number of nodes of the full G: 6574
number of edges of the full G: 922983
Is the full G connected? False
How many connected subgraphs are there? 4
number of nodes of restricted G: 6099
number of edges of restricted G: 118661


In [4]:
node_target = '4932.YJL194W' # CDC6

In [8]:
nodes = ['4932.YJL194W']
for edge in G0.edges:
    if(edge[0] == node_target):
        nodes.append(edge[1])
    if(edge[1] == node_target):
        nodes.append(edge[0])
sub = G0.subgraph(nodes)
print(nx.number_of_nodes(sub)) #Returns the number of nodes in this subgraph.
l = nx.algorithms.clique.cliques_containing_node(sub,'4932.YJL194W')
l = max(l,key=len)
print(len(l)) 
print(l)
#Cliques_containing_node: Returns a list of cliques containing the given node.
#Returns a single list or list of lists depending on input nodes. Optional list of cliques can be input if already computed.

175
48
['4932.YJL194W', '4932.YBR160W', '4932.YPR119W', '4932.YGR108W', '4932.YDL155W', '4932.YLR210W', '4932.YPR120C', '4932.YPL209C', '4932.YGL003C', '4932.YBR135W', '4932.YLL039C', '4932.YLR167W', '4932.YKR094C', '4932.YDL097C', '4932.YFR004W', '4932.YDR427W', '4932.YJL001W', '4932.YOR261C', '4932.YGL011C', '4932.YDL147W', '4932.YIL075C', '4932.YGL048C', '4932.YER094C', '4932.YDR328C', '4932.YHR200W', '4932.YER012W', '4932.YFR050C', '4932.YOR362C', '4932.YOR157C', '4932.YGR253C', '4932.YPR108W', '4932.YPR103W', '4932.YDR394W', '4932.YER021W', '4932.YOR117W', '4932.YKL145W', '4932.YMR314W', '4932.YBL041W', '4932.YOR259C', '4932.YHR027C', '4932.YFR052W', '4932.YGR135W', '4932.YOL038W', '4932.YIL007C', '4932.YDL132W', '4932.YML092C', '4932.YGR232W', '4932.YDL007W']


In [10]:
# Read networks from file
#G_CDC28=nx.read_weighted_edgelist("./../Data/4932_protein_links_v11_0.txt",comments="#",nodetype=str)
G_CDC28=nx.read_weighted_edgelist("4932.protein.links.v11.0.txt",comments="#",nodetype=str)

print('number of nodes of G:',nx.number_of_nodes(G_CDC28))
print('number of edges of G:',nx.number_of_edges(G_CDC28))

number of nodes of G: 6574
number of edges of G: 922983


In [7]:
G0 = G_CDC6
nodes = l[:]
for node in G0.nodes:
    flag = True  
    for n in nodes:
        if not G0.has_edge(node, n):
            flag = False 
    if flag == True:
        nodes.append(node)
print(len(nodes))
print(nodes)
        

48
['4932.YJL194W', '4932.YBR160W', '4932.YPR119W', '4932.YGR108W', '4932.YDL155W', '4932.YLR210W', '4932.YPR120C', '4932.YPL209C', '4932.YGL003C', '4932.YBR135W', '4932.YLL039C', '4932.YLR167W', '4932.YKR094C', '4932.YDL097C', '4932.YFR004W', '4932.YDR427W', '4932.YJL001W', '4932.YOR261C', '4932.YGL011C', '4932.YDL147W', '4932.YIL075C', '4932.YGL048C', '4932.YER094C', '4932.YDR328C', '4932.YHR200W', '4932.YER012W', '4932.YFR050C', '4932.YOR362C', '4932.YOR157C', '4932.YGR253C', '4932.YPR108W', '4932.YPR103W', '4932.YDR394W', '4932.YER021W', '4932.YOR117W', '4932.YKL145W', '4932.YMR314W', '4932.YBL041W', '4932.YOR259C', '4932.YHR027C', '4932.YFR052W', '4932.YGR135W', '4932.YOL038W', '4932.YIL007C', '4932.YDL132W', '4932.YML092C', '4932.YGR232W', '4932.YDL007W']


In [19]:
sub = G0.subgraph(nodes)
print(nx.number_of_nodes(sub))
l1 = nx.algorithms.clique.cliques_containing_node(sub,'4932.YBR160W')
l1 = max(l1,key=len)
print(len(l1))
print(l1)

51
51
['4932.YPR119W', '4932.YDR328C', '4932.YIL046W', '4932.YDL147W', '4932.YOL038W', '4932.YDL097C', '4932.YFR050C', '4932.YLR167W', '4932.YER094C', '4932.YLR210W', '4932.YIL007C', '4932.YOR362C', '4932.YML092C', '4932.YBR135W', '4932.YOR117W', '4932.YGR135W', '4932.YLL039C', '4932.YPR108W', '4932.YDR394W', '4932.YER021W', '4932.YKR094C', '4932.YPR120C', '4932.YMR314W', '4932.YGR109C', '4932.YGR232W', '4932.YFR052W', '4932.YOR259C', '4932.YIL075C', '4932.YGL048C', '4932.YDL155W', '4932.YOR157C', '4932.YGR253C', '4932.YFR004W', '4932.YBR160W', '4932.YGL003C', '4932.YDL007W', '4932.YDL132W', '4932.YOR261C', '4932.YJL194W', '4932.YGR108W', '4932.YER012W', '4932.YGL011C', '4932.YPL209C', '4932.YHR027C', '4932.YJL001W', '4932.YHR200W', '4932.YPR103W', '4932.YMR036C', '4932.YBL041W', '4932.YKL145W', '4932.YDR427W']
