In [None]:
#Assignment:
#let’s merge this network: http://doi.org/10.18119/N91597
#    with this other one: http://www.ndexbio.org/#/network/1c69beff-1229-11e6-a039-06603eb7f303
#    or http://www.ndexbio.org/#/network/89975ebb-6294-11e7-a03e-0ac135e8bacf
#Merge them to make a new network in which nodes that have the same name are combined.
#If there is node 1 in network 1 that has the same name as node 2 in network 2, 
#    the new network should have just one node of that name that has all the attributes from node 1 AND node 2.
#As you do the merge, each node should be given an additional attribute “fromNetwork” 
#    so we can tell which network it originally came from. Values can be “network1”, “network2” or “both”
#Save to ndex
#Use Cytoscape to give it a graphic style in which the nodes are colored based on “fromNetwork”

#Steps:
#Step 1: for each node in TO, store the id as the value and the name as the key in the name-id map
#Step 2: for each node in FROM, check if its name is already in the name-id map. 
#        If it is, then store the TO id as the value and the from id as the key in the from-to-id-map. 
#        Otherwise, create the node in the TO network and 
#        then store the mapping between the node id in FROM to the id of the new node in TO.
#Step 3: copy the edges from FROM, 
#        creating new edges using the corresponding node ids in the from-to-node-id map and old_new_id_map.

#Special notice:
# Method "create_node" returns a new node id in To network, the index always begins from "0",
#   if the created id is duplicated with the existing id in the To network, the new node will replace the existing node.
#   Therefor, we should check the node id in both networks to decide which is the To network.
# Same as method 'create_edge' 

In [58]:
import ndex2.client as nc
import io
import json
from time import sleep

from ndex2.nice_cx_network import NiceCXNetwork
import ndex2

import networkx as nx
import numpy as np

#create a NDEx client object to access my accouont on the NDEx server
my_account="Jeffreyc"
my_password="Frenchfri3"

my_ndex=nc.Ndex2("http://public.ndexbio.org", my_account, my_password)
my_ndex.update_status()


In [59]:
#get NiceCX objects in order to add attribute

uuid_1 = '98ba6a19-586e-11e7-8f50-0ac135e8bacf'
uuid_2 = '1c69beff-1229-11e6-a039-06603eb7f303'

#Smaller networks for testing
#uuid_1 = '6bde432c-0b40-11e7-aba2-0ac135e8bacf'
#uuid_2 = 'c53894ce-8e47-11e5-b435-06603eb7f303'

NiceCX_network_1 = ndex2.create_nice_cx_from_server(server='http://public.ndexbio.org',  uuid = uuid_1)
NiceCX_network_2 = ndex2.create_nice_cx_from_server(server='http://public.ndexbio.org',  uuid = uuid_2)

print ("\n")
NiceCX_network_1.print_summary()
print ("\n")
NiceCX_network_2.print_summary()



Name: BioPlex 2.0 ( ~ 56,000 interactions )
Nodes: 10961
Edges: 56553
Node Attributes: 10961
Edge Attributes: 169659



Name: ChEMBL - High Affinity Compounds vs human targets (Commercially available)
Nodes: 3564
Edges: 11564
Node Attributes: 13569
Edge Attributes: 57820



In [60]:
#determine which network is "TO" and "FROM"

#nodes and edges in two networks may have duplicate id
#Duplicate large network to avoid the issue

#get the number of nodes in each network
number_of_nodes_network1 = len(NiceCX_network_1.get_nodes())
number_of_nodes_network2 = len(NiceCX_network_2.get_nodes())

#print(number_of_nodes_network1)
#print(number_of_nodes_network2)

#the smaller network is the From network
if(number_of_nodes_network1 >= number_of_nodes_network2):
    NiceCX_larger = NiceCX_network_1
    NiceCX_From = NiceCX_network_2
else:
    NiceCX_larger = NiceCX_network_2
    NiceCX_From = NiceCX_network_1
    
#copy the larger network to To network    
NiceCX_To = NiceCXNetwork() #Create an empty niceCx network
        
#copy the larger network to the new created network, the new network is To network
for node_id, larger_node in NiceCX_larger.get_nodes():
    #get each node name in larger network   
    name = list(larger_node.values())[1]  
            
    #'create_node' creates an empty node and returns a node id in the TO network 
    NiceCX_To.create_node(name,node_id)   
        
    #set the node attribute values    
    
    # CAUTION: some nodes in this network have no node attribute! 
    #          In that case, get_node_attributes(node_id) returns a "None" value
    node_attr_list = NiceCX_larger.get_node_attributes(node_id)     #return each node attrbute as an array
    
    if (node_attr_list != None):    
        for i in range(len(node_attr_list)): 
            node_attr_name = list(node_attr_list[i].values())[1]   
            node_attr_value = list(node_attr_list[i].values())[2]
        
            NiceCX_To.set_node_attribute(node_id, node_attr_name, node_attr_value ) 
            
#copy the edges to To network

#create 3 lists to store each edge's 's', 't', 'i' values. These 3 list will be used for checking duplicate edges
to_edge_s_list = []
to_edge_t_list = []
to_edge_i_list = []

for edge_id, larger_edge in NiceCX_larger.get_edges():        
    edge_s = larger_edge.get('s') #source node id in larger network
    edge_t = larger_edge.get('t') #target node id in larger network
    edge_i = larger_edge.get('i') #interaction in the larger network
    
    to_edge_s_list.append(edge_s)
    to_edge_t_list.append(edge_t)
    to_edge_i_list.append(edge_i)

    # 'create_edge' creates an empty edge and returns an edge id
    NiceCX_To.create_edge(edge_s, edge_t, edge_i)

    #set the edge attribute values      
    edge_attr_list = NiceCX_larger.get_edge_attributes(edge_id)
    if(edge_attr_list != None):
        for j in range(len(edge_attr_list)): 
            edge_attr_name = list(edge_attr_list[j].values())[1]   
            edge_attr_value = list(edge_attr_list[j].values())[2]
            NiceCX_To.set_edge_attribute(edge_id, edge_attr_name, edge_attr_value )         
            
#create a dict to store the edge in To network, source node is key and target node as value
to_edge_map = dict(zip(to_edge_s_list,to_edge_t_list))         


print("\n","The From network is:")
NiceCX_From.print_summary()
print("\n","The To network is:")
NiceCX_To.print_summary()


 The From network is:
Name: ChEMBL - High Affinity Compounds vs human targets (Commercially available)
Nodes: 3564
Edges: 11564
Node Attributes: 13569
Edge Attributes: 57820


 The To network is:
Name: Untitled
Nodes: 10961
Edges: 56553
Node Attributes: 10961
Edge Attributes: 169659



In [61]:
#Step 1: for each node in TO, store the id as the value and the name as the key in the name-id map

#create two arrays to store the node names and node ids in To network 
to_name_as_key = []
to_id_as_value = []

#following code is in "NiceCX v2.0 Tutorial", In[24], "CREATE A NAME to ID LOOKUP "

#NiceCX_To.get_nodes() returns two arguments: node_id (which is node id) and node (which is node object)
for node_id, node in NiceCX_To.get_nodes() :
    
    #creates a dict name_id_map, where to_node.get('n') is key and to_node.get('@id') is value
    name_id_map ={node.get('n'): node.get('@id') for node_id, node in NiceCX_To.get_nodes()}

to_name_as_key = list(name_id_map.keys()) # 
to_id_as_value = list(name_id_map.values()) # to_id_as_value[0] = 826, to_id_as_value[825] = 1


number_of_node_in_to = len(to_id_as_value)
print("number of nodes in To: ",number_of_node_in_to)


number of nodes in To:  10961


In [62]:
#add attribute "fromNetwork" to each node in NiceCX_To
for node_id, node in NiceCX_To.get_nodes() :
    NiceCX_To.add_node_attribute(node_id, name='fromNetwork', values='network1')

#add attribute "fromNetwork" to each node in NiceCX_From
#for node_id, node in NiceCX_From.get_nodes() :
#    NiceCX_From.add_node_attribute(node_id, name='fromNetwork', values='network2')
#    print(NiceCX_From.get_node_attributes(node_id))

In [63]:
#Step 2: for each node in FROM, check if its name is already in the name-id map. 
#If it is, then store the TO id as the value and the from id as the key in the from-to-id-map. 
#Otherwise, create the node in the TO network and 
#then store the mapping between the node id in FROM to the id of the new node in TO.

#create two lists for from-to-id-map, this map keeps the duplicated nodes
#fromid_list_as_key is the list has all the matched node id in From network
#toid_list_as_value is the list has all the matched node id in To network
fromid_list_as_key = []
toid_list_as_value =[]

#create two lists for old-new-id-map, this map keeps unmatched nodes
#oldid_list_as_key is the list that has all the unmatched node id in From network
#newid_list_as_value is the list that has all the new node id in To network, which is created/copied for unmatched node 
oldid_list_as_key =[]
newid_list_as_value = []

#for loop to check each node in From network, from_node retruns a node object
for from_node_id, from_node in NiceCX_From.get_nodes():
    a = 0
    this_node_name = from_node.get('n') #from_node.get('n') retruns node name of from_node_id in From network
    
    for i in range(len(to_name_as_key)):  # to_name_as_key is the list of node names in To network
        if (this_node_name == to_name_as_key[i]):  #if found the match node
            fromid_list_as_key.append(from_node_id)   #add this node id to duplicate From id list
            toid_list_as_value.append(name_id_map[this_node_name]) # find dict value(id in To network) by dict key (name in To) in name_id_map
            break
        else:
            a += 1
    
    if (a == len(to_name_as_key)): #if no match then create a new node in To network
        # "create_node" return a new node id in To network, id always begins from "0"
        new_id_in_To = NiceCX_To.create_node(this_node_name,node_represents=from_node_id) 
        
        oldid_list_as_key.append(from_node_id) #add this unmatched node id in From network to the old_id_list
        newid_list_as_value.append(new_id_in_To) #add the new created id in To network to the new_id_list

#create two dicts
from_to_id_map = dict(zip(fromid_list_as_key,toid_list_as_value))      
old_new_id_map = dict(zip(oldid_list_as_key,newid_list_as_value))
            
print ("found ", len(oldid_list_as_key), " unmatched nodes in From network. will be copied to To network")
                                               
print ("found ", len(fromid_list_as_key), "matched nodes in From network. will change attribute value to 'both' in To network")

found  3167  unmatched nodes in From network. will be copied to To network
found  397 matched nodes in From network. will change attribute value to 'both' in To network


In [64]:
#for the matched nodes in To network
#add all attributes in From to the same node in To
#combine the same attributes and change the value of the attribute 

#in from_to_id_map, go through each id 

print (len(fromid_list_as_key))

for x in range (len(fromid_list_as_key)):
    
    #for each pair of from_to_id_map, find value (node id in To network) by key (node id in From network
    from_node_id = fromid_list_as_key[x]    
    to_node_id = from_to_id_map[from_node_id]
    
    #in From network, find the node corresponding from_node_id, store this node's attributes (name:value) in dict from_attr_map
    from_node_attr_list = NiceCX_From.get_node_attributes(from_node_id) #return each node attrbutes in From as an array
    from_node_attr_name = []
    from_node_attr_value = []
    
    if (from_node_attr_list != None):    
        for y in range(len(from_node_attr_list)):         
            from_node_attr_name.append(list(from_node_attr_list[y].values())[1]) 
            from_node_attr_value.append(list(from_node_attr_list[y].values())[2])  
    from_attr_map = dict(zip(from_node_attr_name,from_node_attr_value)) #create a dict to store the attributes pair in From network
    
    #in To network, find the node corresponding to_node_id, store this node's attributes (name:value) in dict to_attr_map
    to_node_attr_list = NiceCX_To.get_node_attributes(to_node_id)  #return each node attrbutes in To as an array
    to_node_attr_name = []
    to_node_attr_value = []
    
    if (to_node_attr_list != None):    
        for z in range(len(to_node_attr_list)): 
            to_node_attr_name.append(list(to_node_attr_list[z].values())[1])
            to_node_attr_value.append(list(to_node_attr_list[z].values())[2])
    to_attr_map = dict(zip(to_node_attr_name,to_node_attr_value)) #create a dict to store the attributes pair in From network
    
    #go through the attribute list of the node in from_to_id_map
    for a in range (len(to_node_attr_name)):
        for b in range (len(from_node_attr_name)):       
                    
            #copy the rest attributes in From network node to To network node with the pair of from_to_id_map
            if (from_node_attr_name[b] != to_node_attr_name[a]):
                NiceCX_To.add_node_attribute(to_node_id, from_node_attr_name[b], from_attr_map[from_node_attr_name[b]] ) 
        
        
    #add attribure 'fromNetwork', value is 'both'
    NiceCX_To.remove_node_attribute(to_node_id,'fromNetwork' )
    NiceCX_To.add_node_attribute(to_node_id, name='fromNetwork', values='both')
    
    print(NiceCX_To.get_node_attributes(to_node_id), "\n")
    



397
[{'po': 5374, 'n': 'Secondary ID', 'v': 'uniprotKB:Q9UKV5'}, {'po': 5374, 'n': 'Target Name', 'v': 'P-glycoprotein 1'}, {'po': 5374, 'n': 'BindingDB Target Page', 'v': 'http://www.bindingdb.org/jsp/dbsearch/PrimarySearch_ki.jsp?energyterm=kJ/mole&tag=pol&polymerid=7531&target=P-glycoprotein+1&column=ki&startPg=0&Increment=50&submit=Search'}, {'po': 5374, 'n': 'type', 'v': 'protein'}, {'po': 5374, 'n': 'Target Name', 'v': 'P-glycoprotein 1'}, {'po': 5374, 'n': 'BindingDB Target Page', 'v': 'http://www.bindingdb.org/jsp/dbsearch/PrimarySearch_ki.jsp?energyterm=kJ/mole&tag=pol&polymerid=7531&target=P-glycoprotein+1&column=ki&startPg=0&Increment=50&submit=Search'}, {'po': 5374, 'n': 'type', 'v': 'protein'}, {'po': 5374, 'n': 'fromNetwork', 'v': 'both'}] 

[{'po': 4305, 'n': 'Secondary ID', 'v': 'uniprotKB:Q86WN2'}, {'po': 4305, 'n': 'Target Name', 'v': 'Multidrug resistance-associated protein 1'}, {'po': 4305, 'n': 'BindingDB Target Page', 'v': 'http://www.bindingdb.org/jsp/dbsearch/Pr

In [65]:
#for the unmatched nodes, COPY the node attribute in From to the created node in To

#using for loop to retrieve all the unmatched node in From network
for n in range(len(oldid_list_as_key)):
    node_attributes = NiceCX_From.get_node_attributes(oldid_list_as_key[n])    #return node attributes as array  
    
    new_id_in_To = old_new_id_map[oldid_list_as_key[n]] #find the corrsponding node ids in To network
        
    #set the attribute values in the To network, have to set attributes one by one to each node
    if (node_attributes != None):
        for x in range(len(node_attributes)): 
            #print(node_attributes[x])
            attr_name = list(node_attributes[x].values())[1]   
            attr_value = list(node_attributes[x].values())[2]  
                
            NiceCX_To.set_node_attribute(new_id_in_To, attr_name, attr_value ) 
        
    #add attribute "fromNetwork" to each node in NiceCX_From
    NiceCX_To.add_node_attribute(new_id_in_To, name='fromNetwork', values='network2')    

In [66]:
#check how many nodes in To network has "both" value
b = 0
n1 =0
n2=0
for id, node in NiceCX_To.get_nodes() :
    if (NiceCX_To.get_node_attribute_value(id, 'fromNetwork') == 'both'):
        b +=1        
    if (NiceCX_To.get_node_attribute_value(id, 'fromNetwork') == 'network1'):
        n1 += 1
    if (NiceCX_To.get_node_attribute_value(id, 'fromNetwork') == 'network2'):
        n2 += 1
print("There are ", b, " nodes in To network have 'both' attribute value")
print("There are ", n1, " nodes in To network have 'network1' attribute value")
print("There are ", n2, " nodes in To network have 'network2' attribute value")

There are  397  nodes in To network have 'both' attribute value
There are  10564  nodes in To network have 'network1' attribute value
There are  3167  nodes in To network have 'network2' attribute value


In [67]:
#Step 3: copy the edges from FROM, 
#creating new edges using the corresponding node ids in the from-to-node-id map and old_new_id_map.

#create 3 list to store each edge values in From network
from_edge_s_list = []
from_edge_t_list = []
#from_edge_i_list = []

#get each edge data in From network
for edge_id, from_edge in NiceCX_From.get_edges():
    from_edge_id = edge_id #edge id in From network
    from_edge_s = from_edge.get('s') #source node id in From network
    from_edge_t = from_edge.get('t') #target node id in From network
    from_edge_i = from_edge.get('i') #interaction in the From network

   #check if this edge is already in To network  
    for n in range (len(to_edge_s_list)):
        if (from_edge_s == to_edge_s_list[n] and from_edge_t == to_edge_map[to_edge_s_list[n]]):
            break

    # 'create_edge' creates an empty edge and returns an edge id
    NiceCX_To.create_edge(from_edge_s, from_edge_t, from_edge_i)    
    
    #set the edge attribute values      
    edge_attr_list = NiceCX_To.get_edge_attributes(from_edge_id)
    if(edge_attr_list != None):
        for x in range(len(edge_attr_list)): 
            edge_attr_name = list(edge_attr_list[x].values())[1] 
            edge_attr_value = list(edge_attr_list[x].values())[2]
            NiceCX_To.set_edge_attribute(edge_id, edge_attr_name, edge_attr_value )  
    
print("total nodes in the TO network ", len(NiceCX_To.get_nodes()))
print("total edges in the TO network ", len(NiceCX_To.get_edges()))

total nodes in the TO network  14128
total edges in the TO network  68117


In [55]:
#upload the combined network to my account
uri_upload_message = NiceCX_To.upload_to("http://public.ndexbio.org", my_account, my_password)
print(uri_upload_message)

Generating CX
http://public.ndexbio.org/v2/network/54c0250f-988d-11e9-bcaf-0ac135e8bacf


In [68]:
#update the network profile
uuid = uri_upload_message.rpartition('/')[-1]

new_profile={"name":"combined 2 networks", "description":"Merge 2 networks to make a new network in which nodes that have the same name are combined.", "version":"4.0"}
my_ndex.update_network_profile(uuid, new_profile)    
my_ndex.make_network_public(uuid)


''