In [24]:
import pandas as pd
import networkx as nx
import ndex2.client as nc
import ndex2

### Map orthologous genes 

Data downloaded from HomoloGene Database: ftp://ftp.ncbi.nih.gov/pub/HomoloGene/build68/homologene.data

In [2]:
filtered_rat_human = pd.read_table('/cellar/users/t1jia/Data/Ortho_rat/Data/homologene_filtered_rat_human.txt', header=None)

In [3]:
header = ['hid', 'taxid','geneid','gene_symbol','protein_id','protein_accession']
filtered_rat_human.columns = header
filtered_rat_human = filtered_rat_human.set_index('hid')

In [4]:
filtered_rat_human.head()

Unnamed: 0_level_0,taxid,geneid,gene_symbol,protein_id,protein_accession
hid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,9606,34,ACADM,4557231,NP_000007.1
3,10116,24158,Acadm,292494885,NP_058682.2
5,9606,37,ACADVL,4557235,NP_000009.1
5,10116,25363,Acadvl,6978435,NP_037023.1
6,9606,38,ACAT1,4557237,NP_000010.1


In [5]:
groups = {}

In [6]:
for hid, row in filtered_rat_human.iterrows():
    if hid not in groups:
        groups[hid] = {}
    if row['taxid'] == 9606:
        groups[hid]['human'] = row['gene_symbol']
    elif row['taxid'] == 10116:
        groups[hid]['rat'] = row['gene_symbol']

In [7]:
# Take all one-to-one mapping
hid_filtered = [hid for hid in groups if len(groups[hid]) == 2]

In [8]:
name_map = {}
for hid in hid_filtered:
    name_map[groups[hid]['human']] = groups[hid]['rat']

In [9]:
human_gene_list = name_map.keys()

In [10]:
len(human_gene_list)

16289

### PCNet and mapping statistics 

In [11]:
PCNet = nx.read_edgelist('/cellar/users/t1jia/Data/Networks/PCNet.txt')

In [12]:
# Number of nodes in PCNet
len(PCNet.nodes())

19781

In [13]:
# Number of ortholog nodes
overlap_nodes = set(PCNet.nodes()).intersection(set(human_gene_list))
len(overlap_nodes)

15925

In [14]:
# Fraction of nodes
len(overlap_nodes)/float(len(PCNet.nodes()))

0.8050654668621404

In [15]:
PCNet_orthologs = PCNet.subgraph(overlap_nodes)

In [16]:
# Number of edges in PCNet
len(PCNet.edges())

2724724

In [17]:
# Number of edges in filtered PCNet
len(PCNet_orthologs.edges())

2215901

In [18]:
# Fraction of edges
float(len(PCNet_orthologs.edges()))/len(PCNet.edges())

0.8132570491543364

### Bring PCnet to rat naming space

In [19]:
rat_PCNet = nx.relabel_nodes(PCNet_orthologs, name_map)

In [20]:
nx.write_edgelist(rat_PCNet,'/cellar/users/t1jia/Data/Ortho_rat/Data/rat_PCNet.txt',data=False)

### Upload rat PCNet to Ndex

In [27]:
my_account="my_account"
my_password="my_password"
my_server="http://public.ndexbio.org"
try:
    my_ndex=nc.Ndex2(my_server, my_account, my_password)
    my_ndex.update_status()
    print("Success.  Please continue.")
except Exception as inst:
    print("Could not access account %s with password %s" % (my_account, my_password))
    print(inst.args)

Success.  Please continue.


In [29]:
NiceCx_ratpcnet = ndex2.create_nice_cx_from_networkx(rat_PCNet)
NiceCx_ratpcnet.set_name('Rat PCNet')

In [30]:
%%time
upload_message = NiceCx_ratpcnet.upload_to(my_server, my_account, my_password)
print(upload_message)

http://public.ndexbio.org/v2/network/edde0831-91fb-11e8-a4bf-0ac135e8bacf
CPU times: user 16min 44s, sys: 1.26 s, total: 16min 46s
Wall time: 16min 52s
