In [1]:
import pandas as pd
import numpy as np
from network_propagation import *
import json
from ndex.networkn import NdexGraph
import ndex
import networkx

### Read in SNP and Gene table

In [2]:
gene = pd.read_table('./gene_level_summary_stats_pmid_25056061.txt', index_col=0)

In [3]:
gene['log-p'] = -np.log(gene['TopSNP P-Value'])

In [5]:
gene.set_index('Gene', inplace=True)

In [6]:
gene

Unnamed: 0_level_0,Chr,Gene Start,Gene End,nSNPs,TopSNP,TopSNP Pos,TopSNP P-Value,SNP Distance,log-p
Gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
HIST1H4K,6,27906930,27907284,8,rs34706883,27913234.0,5.071180e-10,6304.0,21.402277
HIST1H2AK,6,27913636,27914096,16,rs34706883,27913234.0,5.071180e-10,402.0,21.402277
HIST1H2BN,6,27914418,27914867,17,rs34706883,27913234.0,5.071180e-10,1184.0,21.402277
HIST1H2AL,6,27941085,27941555,10,rs13199772,27942064.0,7.053790e-10,979.0,21.072286
HIST1H1B,6,27942548,27943338,10,rs13199772,27942064.0,7.053790e-10,484.0,21.072286
HIST1H3I,6,27947601,27948078,10,rs13199772,27942064.0,7.053790e-10,5537.0,21.072286
HIST1H4L,6,27948904,27949268,10,rs13199772,27942064.0,7.053790e-10,6840.0,21.072286
PGBD1,6,28357342,28378305,30,rs6901575,28358963.0,1.236040e-09,1621.0,20.511353
HIST1H1E,6,26264537,26265322,11,rs3857546,26265741.0,1.458100e-09,1204.0,20.346132
HIST1H2BD,6,26266327,26279555,15,rs3857546,26265741.0,1.458100e-09,586.0,20.346132


### Format input for network propagation

In [7]:
network = NdexGraph(server='http://ndexbio.org', username='yue', password='y8qin', 
                    uuid='f93f402c-86d4-11e7-a10d-0ac135e8bacf')




In [8]:
pcnet = networkx.Graph(network)

In [9]:
# save pickle
#nx.write_gpickle(pcnet, './pcnet.pkl')

In [10]:
norm_pcnet = normalize_network(pcnet)

In [11]:
alpha = calculate_alpha(pcnet)

In [12]:
pcnet.nodes(data=True)

[(0, {'name': u'UBE2Q1', 'represents': u'hgnc.symbol:UBE2Q1'}),
 (1, {'name': u'RNF14', 'represents': u'hgnc.symbol:RNF14'}),
 (2, {'name': u'UBE2Q2', 'represents': u'hgnc.symbol:UBE2Q2'}),
 (3, {'name': u'RNF10', 'represents': u'hgnc.symbol:RNF10'}),
 (4, {'name': u'RNF11', 'represents': u'hgnc.symbol:RNF11'}),
 (5, {'name': u'RNF13', 'represents': u'hgnc.symbol:RNF13'}),
 (6, {'name': u'REM1', 'represents': u'hgnc.symbol:REM1'}),
 (7, {'name': u'REM2', 'represents': u'hgnc.symbol:REM2'}),
 (8, {'name': u'C16orf13', 'represents': u'hgnc.symbol:C16orf13'}),
 (9, {'name': u'RPEL1', 'represents': u'hgnc.symbol:RPEL1'}),
 (10, {'name': u'CCDC109B', 'represents': u'hgnc.symbol:CCDC109B'}),
 (11, {'name': u'UCHL5', 'represents': u'hgnc.symbol:UCHL5'}),
 (12, {'name': u'RNF17', 'represents': u'hgnc.symbol:RNF17'}),
 (13, {'name': u'NBEAL1', 'represents': u'hgnc.symbol:NBEAL1'}),
 (14, {'name': u'MZT2A', 'represents': u'hgnc.symbol:MZT2A'}),
 (15, {'name': u'MZT2B', 'represents': u'hgnc.symbo

In [13]:
node_name = nx.get_node_attributes(pcnet, 'name')

In [14]:
nd = node_name.keys()
node_weight = np.zeros(len(nd), dtype = float)

In [15]:
all_name = gene.index.values
for i in range(len(nd)):
    node_index = nd[i]
    nn = node_name[node_index]
    if nn in all_name:
        w = gene.loc[nn]['log-p']
        node_weight[i] = w

In [22]:
np.save('ndoe_weight', node_weight)

In [23]:
np.save('norm_pcnet', norm_pcnet)

### Network propagation

In [69]:
#Ft = (1-alpha)*Fo * (I-alpha*norm_adj_mat)^-1
def random_walk(alpha, binary_mat, graph_norm):
    term1=(1-alpha)*binary_mat
    term2=np.identity(binary_mat.shape[0])-alpha*graph_norm
    term2_inv = np.linalg.inv(term2)
    graph_prop = np.dot(term1, term2_inv)
    return graph_prop

In [33]:
prop_weight = random_walk(alpha, node_weight, norm_pcnet)

### Save propagated results

In [39]:
outFile = open('./prop_output.txt', 'w')
for i in range(len(nd)):
    node_index = nd[i]
    nn = node_name[node_index]
    outFile.write(nn + '\t' + str(prop_weight[i]) + '\n')
outFile.close()