# Mtb Subnetwork Comparison

## Analysis of Healthy Network

In [7]:
import networkx as nx

In [2]:
#import graph
file = open("../Data/Macrophage_protein_network_with_attributes.pkl", "rb")
G = nx.read_gpickle(file)

In [3]:
#Goal =  Create and save graph object of graph subnetwork of just MTB neighbors's neighbors (second degree)

#Function for getting neighborhood nodes
def second_neighbors(graph, node) -> list:
    """Takes a graph and a node of choice and returns a list of the unique neighborhood nodes of the second degree for the given node,including the given node"""
    node = str(node)
    neighbor_list = []
    neighbor_list.append(node)
    for first_neighbor in graph.neighbors(node):
        neighbor_list.append(first_neighbor)
        for second_neighbor in graph.neighbors(first_neighbor):
            if second_neighbor != node:
                neighbor_list.append(second_neighbor)
    #Filter unqiue
    return_list = [] 
    for i in neighbor_list:
        if i not in return_list:
            return_list.append(i)
    
    return(neighbor_list)



In [4]:
# List of Mtb interactions
mtb_paper_edges = [
('Q7L591', 'Apa'),
('P50552', 'Apa'),
('Q9UI08', 'Apa'),
('Q15654', 'Apa'),
('Q16543', 'LpqR'),
('Q15287', 'Rv1827'),
('Q9C005', 'Rv1075C'),
('Q9C005', 'Rv1074c'),
('Q14847', 'Rv3033'),
('Q14005', 'Rv3033'),
('P22681', 'LpqN'),
('P20339', 'TB8.4'),
('P51610', 'Rv3668c'),
('P12956', 'EspR'),
('P62820', 'ESAT6'),
('Q15233', 'Apa'),
('Q07812', 'PE25'),
('O95466', 'LpqN'),
('Q9NXV2', 'LpqN'),
('Q15154', 'Rv1827'),
('Q01130', 'Rv1827'),
('O60488', 'PE25')]

mtb_node_names = [
'Rv3033',
'Apa',
'PE25',
'Rv1827',
'LpqN',
'Rv1075C',
'Rv1074c',
'LpqR',
'EspR',
'TB8.4',
'Rv3668c',
'ESAT6']

In [5]:
# #write list to file 
# import csv
# with open("../Data/mtb_interactions.csv","w") as file:
#     csv_out = csv.writer(file)
#     csv_out.writerows(mtb_paper_edges)

In [207]:
#Given list of mtb-human protein interactions, pull out the the list of human proteins:
mtb_human_prot_list = []
for pair in mtb_paper_edges: #need to change to given list
    if pair[0] in G.nodes():
        mtb_human_prot_list.append(pair[0])

if len(mtb_human_prot_list) != len(mtb_paper_edges): #just a test
    print("something is wrong, list lengths arent equal")


# aggregate node list of all mtb interactions and their neighbor

def mtb_healthy_neighborhood(lst,graph) -> list:
    """Function takes a list of known mtb interactions and a healthy network graph and compiles
    a list of all nodes of the first/second degree in the healthy network that mtb interacts with"""
    tmp_second_degree_community = []
    for item in lst:
        tmp = item 
        #tmp_neighborhood_list = second_neighbors(G, tmp) ##unblock this for second degree
        tmp_neighborhood_list = list(G.neighbors(tmp)) ##unblock this for first degree
        tmp_second_degree_community += tmp_neighborhood_list #merge neighborhood list with community list
        tmp_second_degree_community.append(tmp)
    #Remove duplicates in community list 
    second_degree_community = [] 
    for i in tmp_second_degree_community:
        if i not in second_degree_community:
            second_degree_community.append(i)
    return(second_degree_community)

def edge_puller(node_list, graph) -> list:
    """Function inputs a list of nodes and a graph and returns a list of all edges containing those nodes"""
    edge_list = []
    for edge in graph.edges():
        #if (edge[0] in node_list) or (edge[1] in node_list): ##unblock this for edges that need only 1 pair
        if (edge[0] in node_list) and (edge[1] in node_list): ##unblock this for restriction of edges
            edge_list.append(edge)
    return(edge_list)
            
#Call function
healthy_neighborhood_nodes = mtb_healthy_neighborhood(mtb_human_prot_list,G)

#Aggregate neighborhood edges from main graph:
healthy_neighborhood_edges = edge_puller(healthy_neighborhood_nodes,G)


#Create and save graph
Gx = nx.Graph(name = "Mtb Healthy Macrophage Sub-network")
Gx.add_edges_from(healthy_neighborhood_edges)


#Add attributes to new graph which got lost in the transfer process
for node in Gx.nodes():
    for attribute in list(G.nodes['P20333'].keys()): #gets list of all atrributes
        Gx.nodes[node][attribute] = G.nodes[node][attribute]
        


#Examine Gx
print(Gx)

#save graph
nx.write_gpickle(Gx, '../Data/Macrophage_healthy_Mtb_sub_protein_network_with_attributes.pkl')

Graph named 'Mtb Healthy Macrophage Sub-network' with 84 nodes and 97 edges


In [112]:
#Create color mapping dictionary where subcell location = key and color = value
#Reserved colors (not used) = lime, yellow, blue
color_dict = {
    "Apical cell membrane":'grey', 
    "Basolateral cell membrane":'darkgray',
    "Cell junction":'lightgrey',
    "Cell membrane":'gainsboro',
    "Cell projection":'rosybrown',
    "Cell surface":'lightcoral',
    "Chromosome":'brown',
    "Cytoplasm":'maroon',
    "Cytoplasmic granule":'red',
    "Cytoplasmic granule membrane":'mistyrose',
    "Cytoplasmic vesicle":'tomato',
    "Cytoplasmic vesicle membrane":'darksalmon',
    "Early endosome":'sienna',
    "Early endosome membrane":'chocolate',
    "Endomembrane system":'saddlebrown',
    "Endoplasmic reticulum":'sandybrown',
    "Endoplasmic reticulum lumen":'peachpuff',
    "Endoplasmic reticulum membrane":'linen',
    "Endoplasmic reticulum-Golgi intermediate compartment membrane":'bisque',
    "Endosome membrane":'darkorange',
    "Golgi apparatus":'burlywood',
    "Golgi apparatus membrane":'tan',
    "Golgi outpost":'papayawhip',
    "Host cell membrane":'orange',
    "Host cytoplasm":'wheat',
    "Host endoplasmic reticulum membrane":'moccasin',
    "Host membrane":'blanchedalmond',
    "Host nucleus":'darkgoldenrod',
    "Host nucleus inner membrane":'goldenrod',
    "Late endosome membrane":'cornsilk',
    "Lysosome":'gold',
    "Lysosome membrane":'lemonchiffon',
    "Membrane":'darkkhaki',
    "Microsome":'lightyellow',
    "Microsome membrane":'olivedrab',
    "Midbody":'beige',
    "Mitochondrion":'olivedrab',
    "Mitochondrion inner membrane":'yellowgreen',
    "Mitochondrion intermembrane space":'darkolivegreen',
    "Mitochondrion matrix":'darkseagreen',
    "Mitochondrion membrane":'palegreen',
    "Mitochondrion outer membrane":'forestgreen',
    "Myelin membrane":'darkgreen',
    "Nucleus":'deepskyblue',
    "Nucleus envelope":'steelblue',
    "Nucleus inner membrane":'lightblue',
    "Nucleus matrix":'cadetblue',
    "Nucleus membrane":'cyan',
    "Nucleus outer membrane":'darkcyan',
    "Nucleus speckle":'darkturquoise',
    "Perikaryon":'slategrey',
    "Peroxisome":'aquamarine',
    "Peroxisome membrane":'mediumaquamarine',
    "Photoreceptor inner segment":'cornflowerblue',
    "Recycling endosome":'navy',
    "Recycling endosome membrane":'mediumblue',
    "Rough endoplasmic reticulum":'blueviolet',
    "Rough endoplasmic reticulum membrane":'indigo',
    "Sarcoplasmic reticulum lumen":'plum',
    "Secreted":'fuchsia',
    "Unknown":'black',
    "Vacuole membrane":'slateblue',
    "Virion":'crimson',
    "Virion membrane":'palevioletred',
    "Virion tegument":'lightpink'
}

In [113]:
print(mtb_human_prot_list)

['Q7L591', 'P50552', 'Q9UI08', 'Q15654', 'Q16543', 'Q15287', 'Q9C005', 'Q9C005', 'Q14847', 'Q14005', 'P22681', 'P20339', 'P51610', 'P12956', 'P62820', 'Q15233', 'Q07812', 'O95466', 'Q9NXV2', 'Q15154', 'Q01130', 'O60488']


In [6]:
#Graph the subnetwork
import matplotlib.pyplot as plt 

#set fig dimensions
plt.figure(figsize =(15,15))

#setting x and y coordinates for nodes and edges
pos = nx.spring_layout(Gx, iterations=20, seed = 15)

#setting color map for attributes. Nodes that MTB interacts with are colored lime
color_map =[]
for node in Gx.nodes():
    location = Gx.nodes[node]['Subcellular_location_[CC]']
    if node in mtb_human_prot_list:
        color_map.append('lime')
    else:
        color_map.append(color_dict[location])
 
        

#Draw nodes w/ color map
nx.draw_networkx_nodes(Gx, 
                       pos, 
                       node_size=150,
                       alpha = 0.75,
                       nodelist = Gx.nodes(), 
                       node_color = color_map,
                       with_labels = True
                      )


#Draw Edges        
nx.draw_networkx_edges(Gx, pos)

#Plot Labels
name = "Subnetwork of macrophage proteins shown to interact with Mtb"
plt.title((name), fontdict = {'fontsize': 25, "color":'black'})
plt.legend(scatterpoints=1)

#####################Need to figure out how to add color legend 
# Scatter plot so I can have a legend
unique_subcellular_sub = list(set([part[0] for node,part in Gx.nodes(data='Subcellular_location_[CC]')]))
for v in unique_subcellular_sub:
    plt.scatter([],[], c=c, label='{}'.format(v))


plt.show()


NameError: name 'Gx' is not defined

<Figure size 1080x1080 with 0 Axes>

In [143]:
#Graph Features of subnetwork
#Eccentricity of a node is the maximum distance from that node to all other nodes in the graph

#Graph Length
print("General graph features = {}".format(print(Gx)))

#Graph Center (center is the set of nodes with eccentricity equal to radius)
try: 
    print("Graph center = {}".format(nx.center(Gx)))
except:
    print("Graph not connected, no graph center can not be calculated")


#Graph Diameter (diameter is maximum eccentricity)
try:
    print("Graph diameter = {}".format(nx.diameter(Gx)))
except:
    print("Graph not connected, Diameter can bot be calculated")


#Graph Radius (minimum eccentricity)
try:
    print("Graph Radius = {}".format(nx.radius(Gx)))
except:
    print("Graph not connected, Radius can bot be calculated")
      
#Average Degree
total_edges = len(Gx.edges())
total_nodes = len(Gx.nodes())
Avg_degree = total_edges/total_nodes
print("The Average Degree of our network is {}. The average degree of a network refers to the average number of edges that exist for each node.".format(round(Avg_degree,2)))

#Density of graph
print("The Density of our graph is {}. Density is a measure of how many ties between nodes exist compared to how many are possible.".format(round(nx.density(G),4)))

#Average clustering coefficient
print("The average clustering coefficient of our graph is {}. The clustering coefficient ranges from 0-1 and is a measure of the ratio of connections for each node observed versus what is possible ".format(round(nx.average_clustering(Gx),4)))

#Is the graph fully connected?
print("Is the graph fully connected? {}".format(nx.is_connected(Gx)))

#Number of connected components:
print("Number of connected components = {}".format(nx.number_connected_components(Gx)))

#Number of Isolates (no neighbors)
isolates = len(list(nx.isolates(Gx)))
print("Number of isolates = {}".format(isolates))



Graph with 84 nodes and 97 edges
General graph features = None
Graph not connected, no graph center can not be calculated
Graph not connected, Diameter can bot be calculated
Graph not connected, Radius can bot be calculated
The Average Degree of our network is 1.15. The average degree of a network refers to the average number of edges that exist for each node.
The Density of our graph is 0.001. Density is a measure of how many ties between nodes exist compared to how many are possible.
The average clustering coefficient of our graph is 0.0573. The clustering coefficient ranges from 0-1 and is a measure of the ratio of connections for each node observed versus what is possible 
Is the graph fully connected? False
Number of connected components = 7
Number of isolates = 0


In [150]:
#Centrality detection of Subnetwork

#Edge betweenness (better at finding bottlenecks)
betweenness_centralities = nx.betweenness_centrality(Gx) #dictionary of values
betweenness_sorted = dict(sorted(betweenness_centralities.items(), reverse = True, key=lambda x:x[1])) #sort it so highest centrality are at top
top_vals_betweenness = {k: betweenness_sorted[k] for k in list(betweenness_sorted)[:int((len(betweenness_sorted)*0.05))]}  #Dict of top 5% of nodes
print("List of top 5% most 'betweenness' central nodes and their corresponding centrality values:")
for i in top_vals_betweenness:
    print(i, ':\t', top_vals_betweenness[i])


#Closeness
Closeness_centralities = nx.closeness_centrality(Gx)
Closeness_sorted = dict(sorted(Closeness_centralities.items(), reverse = True, key=lambda x:x[1]))
top_vals_closeness =  {k: Closeness_sorted[k] for k in list(Closeness_sorted)[:int((len(Closeness_sorted)*0.05))]}  #Dict of top 5% of nodes
print("List of top 5% most 'closeness' central nodes and their corresponding centrality value:")
for i in top_vals_closeness:
    print(i, ':\t', top_vals_closeness[i])


#Degree centrality 
Degree_centralities = nx.degree_centrality(Gx)
Degree_sorted = dict(sorted(Degree_centralities.items(), reverse = True, key=lambda x:x[1])) #sort it so highest centrality are at top
top_vals_degree =  {k: Degree_sorted[k] for k in list(Degree_sorted)[:int((len(Degree_sorted)*0.05))]}  #Dict of top 5% of nodes
print("List of top 5% most 'degree' central nodes and their corresponding centrality value:")
for i in top_vals_degree:
    print(i, ':\t', top_vals_degree[i])



#Graph the network, labeling and highlighting the top 5% central nodes for each method ######NEED TO DO THIS



List of top 5% most 'betweenness' central nodes and their corresponding centrality values:
Q7L591 :	 0.4941223663527514
O95466 :	 0.2515112715671048
P10412 :	 0.21006091730452597
P12956 :	 0.13535442583341498
List of top 5% most 'closeness' central nodes and their corresponding centrality value:
Q7L591 :	 0.3621849360632715
P10412 :	 0.31739862676512504
O95466 :	 0.28939286557996696
P40763 :	 0.2824695912837955
List of top 5% most 'degree' central nodes and their corresponding centrality value:
Q7L591 :	 0.3734939759036145
O95466 :	 0.08433734939759037
P22681 :	 0.07228915662650603
P12956 :	 0.07228915662650603


In [200]:
#Community Detection of Subnetwork

#Louvain method
import community
from networkx.algorithms.community import greedy_modularity_communities

healthy_sub_louvain = community.best_partition(Gx, random_state=0)  #This object returned is a dictionary containing the nodes of graph G as keys, and the community number that node belongs to as the value


#Changing structure of dictionary so that keys = community assignment, and values = list of nodes. This will make it easier for later use
sub_louvain_best_partition_working = {}
for key,value in healthy_sub_louvain.items():
    if value in sub_louvain_best_partition_working.keys():
        sub_louvain_best_partition_working[value].append(key) 
    else:
        sub_louvain_best_partition_working[value] = []    

#Sorting dictionary so keys are in ascending order:
sub_louvain_partition = {}
sorted_key_list = list(sub_louvain_best_partition_working.keys())
sorted_key_list.sort()

for i in sorted_key_list:
    sub_louvain_partition[i] = sub_louvain_best_partition_working[i]
        
#Determining how many communities constitute the 'best partition'. i.e how many communities did louvain find?
print("Communities detected by the Louvain algo = {}".format(len(list(sub_louvain_partition.keys()))))

#FastGreedy--------------------------------
sub_fastgreedy_partition_working = list(greedy_modularity_communities(Gx)) #The object returned is a list of sets of nodes, each for a different community
sub_fastgreedy_partition = {i:sub_fastgreedy_partition_working[i] for i in range(len(sub_fastgreedy_partition_working))} #create dict with same structure
print("Communities detected by the FastGreedy algo = {}".format(len(sub_fastgreedy_partition)))

#WalkTrap--------------------------------------
from cdlib import algorithms
from cdlib import readwrite
import csv
#sub_walktrap_partition = algorithms.walktrap(Gx) #The object returned is a NodeClusterint object   ##unblock this to rerun algo
#readwrite.write_community_csv(sub_walktrap_partition, "../Data/sub_healthy_walktrap.csv") #save object as csv file ##unblock this to resave object
sub_walktrap_partition = {} 
with open("../Data/sub_healthy_walktrap.csv") as file:  #read csv file and create community dict
    file_lines = csv.reader(file, delimiter=',')
    lst = []
    for line in file_lines:
        lst.append(line)
    for i in range(len(lst)):
        sub_walktrap_partition[i] = lst[i]
print("Communities detected by the walktrap algo = {}".format(len(sub_walktrap_partition.keys())))


#Edge_betweeness--------------------------------
#sub_edgebetweenness_partition = algorithms.girvan_newman(Gx,level=1,) #The object returned is a list of sets of nodes, each for a different community  ##unblock this to rerun algo
#readwrite.write_community_csv(sub_edgebetweenness_partition, "../Data/sub_healthy_edgebetweenness.csv") #save object as csv file ##unblock this to resave object  ##unblock this to resave object
sub_edgebetweenness_partition = {} 
with open("../Data/sub_healthy_edgebetweenness.csv") as file:  #read csv file and create community dict
    file_lines = csv.reader(file, delimiter=',')
    lst = []
    for line in file_lines:
        lst.append(line)
    for i in range(len(lst)):
        sub_edgebetweenness_partition[i] = lst[i]
print("Communities detected by the edgebetweenness algo = {}".format(len(sub_edgebetweenness_partition.keys())))



Communities detected by the Louvain algo = 12
Communities detected by the FastGreedy algo = 13
Communities detected by the walktrap algo = 16
Communities detected by the edgebetweenness algo = 8


## Idea - 4 panel graph, with communities detected by each method to compare? Need to figure out best way to graph community structure

In [199]:
print(sub_edgebetweenness_partition)

<cdlib.classes.node_clustering.NodeClustering object at 0x7f9d2ca74310>


In [185]:
sub_edgebetweenness_partition

TypeError: 'NodeClustering' object is not subscriptable

In [204]:
print(mtb_paper_edges)

[('Q7L591', 'Apa'), ('P50552', 'Apa'), ('Q9UI08', 'Apa'), ('Q15654', 'Apa'), ('Q16543', 'LpqR'), ('Q15287', 'Rv1827'), ('Q9C005', 'Rv1075C'), ('Q9C005', 'Rv1074c'), ('Q14847', 'Rv3033'), ('Q14005', 'Rv3033'), ('P22681', 'LpqN'), ('P20339', 'TB8.4'), ('P51610', 'Rv3668c'), ('P12956', 'EspR'), ('P62820', 'ESAT6'), ('Q15233', 'Apa'), ('Q07812', 'PE25'), ('O95466', 'LpqN'), ('Q9NXV2', 'LpqN'), ('Q15154', 'Rv1827'), ('Q01130', 'Rv1827'), ('O60488', 'PE25')]


# Analysis of Diseased Network

In [206]:
#Creating diseased subnetwork
print(Gx)

Gz = nx.Graph(name = "Mtb Diseased Macrophage Sub-network" )
Gz.add_edges_from(Gx.edges())
Gz.add_edges_from(mtb_paper_edges)
print(Gz)

Graph with 84 nodes and 97 edges
Graph named 'Mtb Diseased Macrophage Sub-network' with 96 nodes and 119 edges


In [222]:
#Graph Features of diseased subnetwork
#Graph Features of subnetwork
#Eccentricity of a node is the maximum distance from that node to all other nodes in the graph

#Graph Length
print("General graph features = {}".format(Gz))

#Graph Center (center is the set of nodes with eccentricity equal to radius)
try: 
    print("Graph center = {}".format(nx.center(Gz)))
except:
    print("Graph not connected, no graph center can not be calculated")


#Graph Diameter (diameter is maximum eccentricity)
try:
    print("Graph diameter = {}".format(nx.diameter(Gz)))
except:
    print("Graph not connected, Diameter can bot be calculated")


#Graph Radius (minimum eccentricity)
try:
    print("Graph Radius = {}".format(nx.radius(Gz)))
except:
    print("Graph not connected, Radius can bot be calculated")
      
#Average Degree
total_edges = len(Gz.edges())
total_nodes = len(Gz.nodes())
Avg_degree = total_edges/total_nodes
print("The Average Degree of our network is {}. The average degree of a network refers to the average number of edges that exist for each node.".format(round(Avg_degree,2)))

#Density of graph
print("The Density of our graph is {}. Density is a measure of how many ties between nodes exist compared to how many are possible.".format(round(nx.density(Gz),4)))

#Average clustering coefficient
print("The average clustering coefficient of our graph is {}. The clustering coefficient ranges from 0-1 and is a measure of the ratio of connections for each node observed versus what is possible ".format(round(nx.average_clustering(Gz),4)))

#Is the graph fully connected?
print("Is the graph fully connected? {}".format(nx.is_connected(Gz)))

#Number of connected components:
print("Number of connected components = {}".format(nx.number_connected_components(Gz)))

#Number of Isolates (no neighbors)
isolates = len(list(nx.isolates(Gz)))
print("Number of isolates = {}".format(isolates))



General graph features = Graph named 'Mtb Diseased Macrophage Sub-network' with 96 nodes and 119 edges
Graph not connected, no graph center can not be calculated
Graph not connected, Diameter can bot be calculated
Graph not connected, Radius can bot be calculated
The Average Degree of our network is 1.24. The average degree of a network refers to the average number of edges that exist for each node.
The Density of our graph is 0.0261. Density is a measure of how many ties between nodes exist compared to how many are possible.
The average clustering coefficient of our graph is 0.0499. The clustering coefficient ranges from 0-1 and is a measure of the ratio of connections for each node observed versus what is possible 
Is the graph fully connected? False
Number of connected components = 4
Number of isolates = 0


In [209]:
#Graph Network


In [210]:
#Centrality Detection of diseased subnetwork

#Edge betweenness (better at finding bottlenecks)
betweenness_centralities = nx.betweenness_centrality(Gz) #dictionary of values
betweenness_sorted = dict(sorted(betweenness_centralities.items(), reverse = True, key=lambda x:x[1])) #sort it so highest centrality are at top
top_vals_betweenness = {k: betweenness_sorted[k] for k in list(betweenness_sorted)[:int((len(betweenness_sorted)*0.05))]}  #Dict of top 5% of nodes
print("List of top 5% most 'betweenness' central nodes and their corresponding centrality values:")
for i in top_vals_betweenness:
    print(i, ':\t', top_vals_betweenness[i])


#Closeness
Closeness_centralities = nx.closeness_centrality(Gz)
Closeness_sorted = dict(sorted(Closeness_centralities.items(), reverse = True, key=lambda x:x[1]))
top_vals_closeness =  {k: Closeness_sorted[k] for k in list(Closeness_sorted)[:int((len(Closeness_sorted)*0.05))]}  #Dict of top 5% of nodes
print("List of top 5% most 'closeness' central nodes and their corresponding centrality value:")
for i in top_vals_closeness:
    print(i, ':\t', top_vals_closeness[i])


#Degree centrality 
Degree_centralities = nx.degree_centrality(Gz)
Degree_sorted = dict(sorted(Degree_centralities.items(), reverse = True, key=lambda x:x[1])) #sort it so highest centrality are at top
top_vals_degree =  {k: Degree_sorted[k] for k in list(Degree_sorted)[:int((len(Degree_sorted)*0.05))]}  #Dict of top 5% of nodes
print("List of top 5% most 'degree' central nodes and their corresponding centrality value:")
for i in top_vals_degree:
    print(i, ':\t', top_vals_degree[i])



#Graph the network, labeling and highlighting the top 5% central nodes for each method ######NEED TO DO THIS

List of top 5% most 'betweenness' central nodes and their corresponding centrality values:
Q7L591 :	 0.4855827512042517
O95466 :	 0.2719860822268439
P10412 :	 0.2182183295117228
P12956 :	 0.12027782221511225
List of top 5% most 'closeness' central nodes and their corresponding centrality value:
Q7L591 :	 0.3503178235443682
P10412 :	 0.32229239766081874
O95466 :	 0.30089539200698845
P40763 :	 0.27572543526115667
List of top 5% most 'degree' central nodes and their corresponding centrality value:
Q7L591 :	 0.3368421052631579
O95466 :	 0.08421052631578947
P22681 :	 0.07368421052631578
P12956 :	 0.07368421052631578


In [212]:
#Community Detection of diseased subnetwork

#Louvain method
import community
from networkx.algorithms.community import greedy_modularity_communities

healthy_sub_louvain = community.best_partition(Gz, random_state=0)  #This object returned is a dictionary containing the nodes of graph G as keys, and the community number that node belongs to as the value


#Changing structure of dictionary so that keys = community assignment, and values = list of nodes. This will make it easier for later use
sub_louvain_best_partition_working = {}
for key,value in healthy_sub_louvain.items():
    if value in sub_louvain_best_partition_working.keys():
        sub_louvain_best_partition_working[value].append(key) 
    else:
        sub_louvain_best_partition_working[value] = []    

#Sorting dictionary so keys are in ascending order:
sub_louvain_partition = {}
sorted_key_list = list(sub_louvain_best_partition_working.keys())
sorted_key_list.sort()

for i in sorted_key_list:
    sub_louvain_partition[i] = sub_louvain_best_partition_working[i]
        
#Determining how many communities constitute the 'best partition'. i.e how many communities did louvain find?
print("Communities detected by the Louvain algo = {}".format(len(list(sub_louvain_partition.keys()))))

#FastGreedy--------------------------------
sub_fastgreedy_partition_working = list(greedy_modularity_communities(Gz)) #The object returned is a list of sets of nodes, each for a different community
sub_fastgreedy_partition = {i:sub_fastgreedy_partition_working[i] for i in range(len(sub_fastgreedy_partition_working))} #create dict with same structure
print("Communities detected by the FastGreedy algo = {}".format(len(sub_fastgreedy_partition)))

#WalkTrap--------------------------------------
from cdlib import algorithms
from cdlib import readwrite
import csv
#sub_walktrap_partition = algorithms.walktrap(Gz) #The object returned is a NodeClusterint object   ##unblock this to rerun algo
#readwrite.write_community_csv(sub_walktrap_partition, "../Data/sub_diseased_walktrap.csv") #save object as csv file ##unblock this to resave object
sub_walktrap_partition = {} 
with open("../Data/sub_diseased_walktrap.csv") as file:  #read csv file and create community dict
    file_lines = csv.reader(file, delimiter=',')
    lst = []
    for line in file_lines:
        lst.append(line)
    for i in range(len(lst)):
        sub_walktrap_partition[i] = lst[i]
print("Communities detected by the walktrap algo = {}".format(len(sub_walktrap_partition.keys())))


#Edge_betweeness--------------------------------
#sub_edgebetweenness_partition = algorithms.girvan_newman(Gz,level=1,) #The object returned is a list of sets of nodes, each for a different community  ##unblock this to rerun algo
#readwrite.write_community_csv(sub_edgebetweenness_partition, "../Data/sub_diseased_edgebetweenness.csv") #save object as csv file ##unblock this to resave object  ##unblock this to resave object
sub_edgebetweenness_partition = {} 
with open("../Data/sub_diseased_edgebetweenness.csv") as file:  #read csv file and create community dict
    file_lines = csv.reader(file, delimiter=',')
    lst = []
    for line in file_lines:
        lst.append(line)
    for i in range(len(lst)):
        sub_edgebetweenness_partition[i] = lst[i]
print("Communities detected by the edgebetweenness algo = {}".format(len(sub_edgebetweenness_partition.keys())))



Communities detected by the Louvain algo = 9
Communities detected by the FastGreedy algo = 10
Communities detected by the walktrap algo = 16
Communities detected by the edgebetweenness algo = 5


In [23]:
###Considerations = how many communities are found compared to healthy? which nodes are considered central now? any changes?

SyntaxError: invalid syntax (3885921755.py, line 1)