# Whole Network Analysis

## Analysis of the healthy Network

In [27]:
import networkx as nx
import matplotlib.pyplot as plt 
import community 
import time
from cdlib import algorithms
from networkx.algorithms.community import greedy_modularity_communities
import csv 

#import graph
file = open("../Data/Macrophage_protein_network_with_attributes.pkl", "rb")
GX = nx.read_gpickle(file)
GX = nx.Graph(GX, name = "Healthy Macrophage Entire Network")


In [None]:
#Graph features:
#Eccentricity of a node is the maximum distance from that node to all other nodes in the graph

#Graph Length
print("General graph features = {}".format(GX))

#Graph Center (center is the set of nodes with eccentricity equal to radius)
try: 
    print("Graph center = {}".format(nx.center(GX)))
except:
    print("Graph not connected, no graph center can not be calculated")


#Graph Diameter (diameter is maximum eccentricity)
try:
    print("Graph diameter = {}".format(nx.diameter(GX)))
except:
    print("Graph not connected, Diameter can bot be calculated")


#Graph Radius (minimum eccentricity)
try:
    print("Graph Radius = {}".format(nx.radius(GX)))
except:
    print("Graph not connected, Radius can bot be calculated")
      
#Average Degree
total_edges = len(GX.edges())
total_nodes = len(GX.nodes())
Avg_degree = total_edges/total_nodes
print("The Average Degree of our network is {}. The average degree of a network refers to the average number of edges that exist for each node.".format(round(Avg_degree,2)))

#Density of graph
print("The Density of our graph is {}. Density is a measure of how many ties between nodes exist compared to how many are possible.".format(round(nx.density(GX),4)))

#Average clustering coefficient
print("The average clustering coefficient of our graph is {}. The clustering coefficient ranges from 0-1 and is a measure of the ratio of connections for each node observed versus what is possible ".format(round(nx.average_clustering(GZ),4)))

#Is the graph fully connected?
print("Is the graph fully connected? {}".format(nx.is_connected(GX)))

#Number of connected components:
print("Number of connected components = {}".format(nx.number_connected_components(GX)))

#Number of Isolates (no neighbors)
isolates = len(list(nx.isolates(GX)))
print("Number of isolates = {}".format(isolates))

In [28]:
#Centrality Detection of healthy Entire Network

#Edge betweenness (better at finding bottlenecks)
betweenness_centralities = nx.betweenness_centrality(GX) #dictionary of values
betweenness_sorted = dict(sorted(betweenness_centralities.items(), reverse = True, key=lambda x:x[1])) #sort it so highest centrality are at top
top_vals_betweenness = {k: betweenness_sorted[k] for k in list(betweenness_sorted)[:int((len(betweenness_sorted)*0.01))]}  #Dict of top 5% of nodes
print("List of top 1% most 'betweenness' central nodes and their corresponding centrality values:")
for i in top_vals_betweenness:
    print(i, ':\t', top_vals_betweenness[i])


#Closeness
Closeness_centralities = nx.closeness_centrality(GX)
Closeness_sorted = dict(sorted(Closeness_centralities.items(), reverse = True, key=lambda x:x[1]))
top_vals_closeness =  {k: Closeness_sorted[k] for k in list(Closeness_sorted)[:int((len(Closeness_sorted)*0.01))]}  #Dict of top 5% of nodes
print("List of top 1% most 'closeness' central nodes and their corresponding centrality value:")
for i in top_vals_closeness:
    print(i, ':\t', top_vals_closeness[i])


#Degree centrality 
Degree_centralities = nx.degree_centrality(GX)
Degree_sorted = dict(sorted(Degree_centralities.items(), reverse = True, key=lambda x:x[1])) #sort it so highest centrality are at top
top_vals_degree =  {k: Degree_sorted[k] for k in list(Degree_sorted)[:int((len(Degree_sorted)*0.01))]}  #Dict of top 5% of nodes
print("List of top 1% most 'degree' central nodes and their corresponding centrality value:")
for i in top_vals_degree:
    print(i, ':\t', top_vals_degree[i])




List of top 1% most 'betweenness' central nodes and their corresponding centrality values:
P16401 :	 0.11924147875476535
Q9H257 :	 0.09458472924954216
P08631 :	 0.09003160526160497
Q02930 :	 0.0713029192200592
P02545 :	 0.07042220710747912
P59046 :	 0.06742776586323401
Q8NI38 :	 0.06013408713138839
Q13422 :	 0.0553462861236223
Q8NEC5 :	 0.05166233353887769
P30273 :	 0.04960936865274374
O95231 :	 0.04603880241374417
Q9P2A4 :	 0.04449308827811953
P43405 :	 0.0421177582311568
P01375 :	 0.04157392924626256
O43586 :	 0.03632540323065745
P04271 :	 0.03592267444858613
P19397 :	 0.033013654231402394
P26951 :	 0.03201049602934372
Q9HC29 :	 0.03144355040446455
Q8TB24 :	 0.03108593555651236
P41220 :	 0.031077243597077787
Q6XD76 :	 0.02871958676900104
Q9Y4H4 :	 0.028536713497742158
Q8N386 :	 0.02826731146924905
Q07325 :	 0.026658862483788796
P05107 :	 0.025899581272666884
Q96FZ5 :	 0.025152169836247674
P35408 :	 0.024966372664871745
P54852 :	 0.024864753142541208
Q9BXN2 :	 0.024236053943784284
Lis

In [29]:
#Louvain method
import community
from networkx.algorithms.community import greedy_modularity_communities

healthy_whole_louvain = community.best_partition(GX, random_state=0)  #This object returned is a dictionary containing the nodes of graph G as keys, and the community number that node belongs to as the value

print("Healthy-Whole network community detection:")
#Changing structure of dictionary so that keys = community assignment, and values = list of nodes. This will make it easier for later use
whole_louvain_best_partition_working = {}
for key,value in healthy_whole_louvain.items():
    if value in whole_louvain_best_partition_working.keys():
        whole_louvain_best_partition_working[value].append(key) 
    else:
        whole_louvain_best_partition_working[value] = []    

#Sorting dictionary so keys are in ascending order:
whole_louvain_partition = {}
sorted_key_list = list(whole_louvain_best_partition_working.keys())
sorted_key_list.sort()

for i in sorted_key_list:
    whole_louvain_partition[i] = whole_louvain_best_partition_working[i]
        
#Determining how many communities constitute the 'best partition'. i.e how many communities did louvain find?
print("Communities detected by the Louvain algo = {}".format(len(list(whole_louvain_partition.keys()))))

#FastGreedy--------------------------------
whole_fastgreedy_partition_working = list(greedy_modularity_communities(GX)) #The object returned is a list of sets of nodes, each for a different community
whole_fastgreedy_partition = {i:whole_fastgreedy_partition_working[i] for i in range(len(whole_fastgreedy_partition_working))} #create dict with same structure
print("Communities detected by the FastGreedy algo = {}".format(len(whole_fastgreedy_partition)))

#WalkTrap--------------------------------------
from cdlib import algorithms
from cdlib import readwrite
import csv
#whole_walktrap_partition = algorithms.walktrap(GX) #The object returned is a NodeClusterint object   ##unblock this to rerun algo
#readwrite.write_community_csv(whole_walktrap_partition, "../Data/whole_healthy_walktrap.csv") #save object as csv file ##unblock this to resave object
whole_walktrap_partition = {} 
with open("../Data/whole_healthy_walktrap.csv") as file:  #read csv file and create community dict
    file_lines = csv.reader(file, delimiter=',')
    lst = []
    for line in file_lines:
        lst.append(line)
    for i in range(len(lst)):
        whole_walktrap_partition[i] = lst[i]
print("Communities detected by the walktrap algo = {}".format(len(whole_walktrap_partition.keys())))


#Edge_betweeness--------------------------------
#whole_edgebetweenness_partition = algorithms.girvan_newman(GX,level=1,) #The object returned is a list of sets of nodes, each for a different community  ##unblock this to rerun algo
#readwrite.write_community_csv(whole_edgebetweenness_partition, "../Data/whole_healthy_edgebetweenness.csv") #save object as csv file ##unblock this to resave object  ##unblock this to resave object
whole_edgebetweenness_partition = {} 
with open("../Data/whole_healthy_edgebetweenness.csv") as file:  #read csv file and create community dict
    file_lines = csv.reader(file, delimiter=',')
    lst = []
    for line in file_lines:
        lst.append(line)
    for i in range(len(lst)):
        whole_edgebetweenness_partition[i] = lst[i]
print("Communities detected by the edgebetweenness algo = {}".format(len(whole_edgebetweenness_partition.keys())))


Healthy-Whole network community detection:
Communities detected by the Louvain algo = 47
Communities detected by the FastGreedy algo = 49
Communities detected by the walktrap algo = 150
Communities detected by the edgebetweenness algo = 23


# *Analysis of the Diseased Network*

In [15]:
#Extract diseased edge list 
mtb_interactions = []
with open("../Data/mtb_interactions.csv") as file:
    file_lines = csv.reader(file, delimiter=',')   
    for line in file_lines:
        mtb_interactions.append(line)

#Create diseased network
GZ = nx.Graph(GX, name = "Mtb Diseased Marophage Entire network")
GZ.add_edges_from(mtb_interactions)

In [20]:
#Graph features:
#Eccentricity of a node is the maximum distance from that node to all other nodes in the graph

#Graph Length
print("General graph features = {}".format(GZ))

#Graph Center (center is the set of nodes with eccentricity equal to radius)
try: 
    print("Graph center = {}".format(nx.center(GZ)))
except:
    print("Graph not connected, no graph center can not be calculated")


#Graph Diameter (diameter is maximum eccentricity)
try:
    print("Graph diameter = {}".format(nx.diameter(GZ)))
except:
    print("Graph not connected, Diameter can bot be calculated")


#Graph Radius (minimum eccentricity)
try:
    print("Graph Radius = {}".format(nx.radius(GZ)))
except:
    print("Graph not connected, Radius can bot be calculated")
      
#Average Degree
total_edges = len(GZ.edges())
total_nodes = len(GZ.nodes())
Avg_degree = total_edges/total_nodes
print("The Average Degree of our network is {}. The average degree of a network refers to the average number of edges that exist for each node.".format(round(Avg_degree,2)))

#Density of graph
print("The Density of our graph is {}. Density is a measure of how many ties between nodes exist compared to how many are possible.".format(round(nx.density(GZ),4)))

#Average clustering coefficient
print("The average clustering coefficient of our graph is {}. The clustering coefficient ranges from 0-1 and is a measure of the ratio of connections for each node observed versus what is possible ".format(round(nx.average_clustering(GZ),4)))

#Is the graph fully connected?
print("Is the graph fully connected? {}".format(nx.is_connected(GZ)))

#Number of connected components:
print("Number of connected components = {}".format(nx.number_connected_components(GZ)))

#Number of Isolates (no neighbors)
isolates = len(list(nx.isolates(GZ)))
print("Number of isolates = {}".format(isolates))

General graph features = Graph named 'Mtb Diseased Marophage Entire network' with 3069 nodes and 4755 edges
Graph not connected, no graph center can not be calculated
Graph not connected, Diameter can bot be calculated
Graph not connected, Radius can bot be calculated
The Average Degree of our network is 1.55. The average degree of a network refers to the average number of edges that exist for each node.
The Density of our graph is 0.001. Density is a measure of how many ties between nodes exist compared to how many are possible.
The average clustering coefficient of our graph is 0.009. The clustering coefficient ranges from 0-1 and is a measure of the ratio of connections for each node observed versus what is possible 
Is the graph fully connected? False
Number of connected components = 22
Number of isolates = 0


In [23]:
#Centrality Detection of Diseased Entire Network

#Edge betweenness (better at finding bottlenecks)
betweenness_centralities = nx.betweenness_centrality(GZ) #dictionary of values
betweenness_sorted = dict(sorted(betweenness_centralities.items(), reverse = True, key=lambda x:x[1])) #sort it so highest centrality are at top
top_vals_betweenness = {k: betweenness_sorted[k] for k in list(betweenness_sorted)[:int((len(betweenness_sorted)*0.01))]}  #Dict of top 5% of nodes
print("List of top 1% most 'betweenness' central nodes and their corresponding centrality values:")
for i in top_vals_betweenness:
    print(i, ':\t', top_vals_betweenness[i])


#Closeness
Closeness_centralities = nx.closeness_centrality(GZ)
Closeness_sorted = dict(sorted(Closeness_centralities.items(), reverse = True, key=lambda x:x[1]))
top_vals_closeness =  {k: Closeness_sorted[k] for k in list(Closeness_sorted)[:int((len(Closeness_sorted)*0.01))]}  #Dict of top 5% of nodes
print("List of top 1% most 'closeness' central nodes and their corresponding centrality value:")
for i in top_vals_closeness:
    print(i, ':\t', top_vals_closeness[i])


#Degree centrality 
Degree_centralities = nx.degree_centrality(GZ)
Degree_sorted = dict(sorted(Degree_centralities.items(), reverse = True, key=lambda x:x[1])) #sort it so highest centrality are at top
top_vals_degree =  {k: Degree_sorted[k] for k in list(Degree_sorted)[:int((len(Degree_sorted)*0.01))]}  #Dict of top 5% of nodes
print("List of top 1% most 'degree' central nodes and their corresponding centrality value:")
for i in top_vals_degree:
    print(i, ':\t', top_vals_degree[i])




List of top 5% most 'betweenness' central nodes and their corresponding centrality values:
P16401 :	 0.11884697438494135
Q9H257 :	 0.09450323277138711
P08631 :	 0.09007993540358102
Q02930 :	 0.0711127312981739
P02545 :	 0.07010756531181661
P59046 :	 0.06609019986130885
Q8NI38 :	 0.05988046769619658
Q13422 :	 0.05518072179513771
Q8NEC5 :	 0.05182278689484462
P30273 :	 0.0494658119173497
O95231 :	 0.04589812498914356
Q9P2A4 :	 0.04424172910307079
P43405 :	 0.04245718474387359
P01375 :	 0.041354641603132056
O43586 :	 0.036998825211365514
P04271 :	 0.035795279019257775
P19397 :	 0.0329104679344748
P26951 :	 0.03186307931253838
Q8TB24 :	 0.031584057148044804
Q9HC29 :	 0.03130970117411589
P41220 :	 0.031197455619096613
Q6XD76 :	 0.028564629907338556
Q9Y4H4 :	 0.02840488947303931
Q8N386 :	 0.02814361771804119
Q07325 :	 0.02652101355602888
P05107 :	 0.025779791663369758
Q96FZ5 :	 0.025103656715018782
P35408 :	 0.024872094375378507
P54852 :	 0.024714347442905807
Q9BXN2 :	 0.0240945279998602
Lis

In [25]:
#Louvain method
import community
from networkx.algorithms.community import greedy_modularity_communities

print("Diseased-Whole network community detection:")
diseased_whole_louvain = community.best_partition(GZ, random_state=0)  #This object returned is a dictionary containing the nodes of graph G as keys, and the community number that node belongs to as the value


#Changing structure of dictionary so that keys = community assignment, and values = list of nodes. This will make it easier for later use
whole_louvain_best_partition_working = {}
for key,value in diseased_whole_louvain.items():
    if value in whole_louvain_best_partition_working.keys():
        whole_louvain_best_partition_working[value].append(key) 
    else:
        whole_louvain_best_partition_working[value] = []    

#Sorting dictionary so keys are in ascending order:
whole_louvain_partition = {}
sorted_key_list = list(whole_louvain_best_partition_working.keys())
sorted_key_list.sort()

for i in sorted_key_list:
    whole_louvain_partition[i] = whole_louvain_best_partition_working[i]
        
#Determining how many communities constitute the 'best partition'. i.e how many communities did louvain find?
print("Communities detected by the Louvain algo = {}".format(len(list(whole_louvain_partition.keys()))))

#FastGreedy--------------------------------
whole_fastgreedy_partition_working = list(greedy_modularity_communities(GZ)) #The object returned is a list of sets of nodes, each for a different community
whole_fastgreedy_partition = {i:whole_fastgreedy_partition_working[i] for i in range(len(whole_fastgreedy_partition_working))} #create dict with same structure
print("Communities detected by the FastGreedy algo = {}".format(len(whole_fastgreedy_partition)))

#WalkTrap--------------------------------------
from cdlib import algorithms
from cdlib import readwrite
import csv
#whole_walktrap_partition = algorithms.walktrap(GZ) #The object returned is a NodeClusterint object   ##unblock this to rerun algo
#readwrite.write_community_csv(whole_walktrap_partition, "../Data/whole_diseased_walktrap.csv") #save object as csv file ##unblock this to resave object
whole_walktrap_partition = {} 
with open("../Data/whole_diseased_walktrap.csv") as file:  #read csv file and create community dict
    file_lines = csv.reader(file, delimiter=',')
    lst = []
    for line in file_lines:
        lst.append(line)
    for i in range(len(lst)):
        whole_walktrap_partition[i] = lst[i]
print("Communities detected by the walktrap algo = {}".format(len(whole_walktrap_partition.keys())))


#Edge_betweeness--------------------------------
#whole_edgebetweenness_partition = algorithms.girvan_newman(GZ,level=1,) #The object returned is a list of sets of nodes, each for a different community  ##unblock this to rerun algo
#readwrite.write_community_csv(whole_edgebetweenness_partition, "../Data/whole_diseased_edgebetweenness.csv") #save object as csv file ##unblock this to resave object  ##unblock this to resave object
whole_edgebetweenness_partition = {} 
with open("../Data/whole_diseased_edgebetweenness.csv") as file:  #read csv file and create community dict
    file_lines = csv.reader(file, delimiter=',')
    lst = []
    for line in file_lines:
        lst.append(line)
    for i in range(len(lst)):
        whole_edgebetweenness_partition[i] = lst[i]
print("Communities detected by the edgebetweenness algo = {}".format(len(whole_edgebetweenness_partition.keys())))


Communities detected by the Louvain algo = 47
Communities detected by the FastGreedy algo = 51
Communities detected by the walktrap algo = 155
Communities detected by the edgebetweenness algo = 23
