In [1]:
import networkx as nx
import random
import numpy as np
import pandas as pd
from scipy.stats import binom
from statsmodels.stats.multitest import multipletests

In [2]:
#data
G = nx.read_graphml('test.graphml') #graph

#fold-change dict
fold_change = {}
k = list(range(0,5))
for i in G.nodes():
    fold_change[i] = random.choice(k)        

In [33]:
def overlay(graph, fold_change_dict):
    '''Overlaying the fold-change data onto the graph.'''
    threshold = 2 #arbitary for test dict
    for i in graph.nodes():
        
        # Check here
        
        if fold_change_dict[i] > threshold: #increased expression
            graph.add_node(i, change = 1)
        elif fold_change_dict[i] < threshold: #decreased expression
            graph.add_node(i, change = -1)
        else: #no change 
            graph.add_node(i, change = 0)
    return graph

def shortest_path(graph,hyp_node):
    '''Returning the shorstest path of the hype nodes with all the other nodes.'''
    return nx.single_source_shortest_path(graph,hyp_node)

def edge_label_value(graph,path_list):
    '''Return the product of the edges value of the path.'''
    if len(path_list) == 1:
        return 0
    else:
        edge = {'increase' : 1, 'decrease' : -1}
        edge_list = []
        
        for i in range(len(path_list) - 1):
            k = graph.edges[path_list[i], path_list[i+1]] #edge dictionary attribute
            
            # Add check here for relation it should exist
            edge_list.append(edge[k['Relation']])
        return np.prod(edge_list)
    
def node_label_value(graph,path_list):
    '''Returns the product of the starting and end node of the path.'''
    if len(path_list) == 1:
        return 0
    else:
        node_list = []
        for i,j in graph.nodes.data():
            if i == path_list[0] or i == path_list[-1]:
                node_list.append(j['change'])
        return np.prod(node_list)

def p_value(concordance_count,total_nodes,p):
    """Return the p-value with input parametes as no.of concordance nodes,no.of trials
    and probability of achieveing the result."""
    return binom.cdf(concordance_count, total_nodes, p)

def p_val_correction(p):
    '''Uses Benjamini and Hochberg p-value correction.'''
    return multipletests(p, alpha=0.05, method='fdr_bh')

def calculate_concordance(graph,hyp_node):
    if hyp_node not in graph:
        raise ValueError('Node not preset in graph.')
    else:
        concordance_count = 0
        non_concordance_count = 0
        
        path_dict = shortest_path(graph,hyp_node)
        node_num = len(path_dict) - 1 #to remove the node oath with itself.
        
        for i in path_dict:
            path = path_dict[i] #path to travel
            edge_val = edge_label_value(graph,path) #edge product value
            node_val = node_label_value(graph,path) # node product value
            if edge_val == node_val:
                concordance_count += 1
            else:
                non_concordance_count += 1
        p_val = p_value(concordance_count,node_num,0.5)
    return (node_num,concordance_count,non_concordance_count,p_val)

In [34]:
overlay_graph = overlay(G, fold_change)
concordance_dict = {}
for i in overlay_graph.nodes():
    concordance = calculate_concordance(overlay_graph, i)
    concordance_dict[i] = concordance   

In [40]:
concordance_df = pd.DataFrame.from_dict(concordance_dict)
concordance_df = concordance_df.transpose()
concordance_df.columns = ['No_of_Nodes','Concordance','Non-concordance','p-value']
for i in ['No_of_Nodes','Concordance','Non-concordance']:
    concordance_df[i] = pd.to_numeric(concordance_df[i])
concordance_df

Unnamed: 0,No_of_Nodes,Concordance,Non-concordance,p-value
AKT1,1.0,1.0,1.0,1.0
CALM1,11.0,1.0,11.0,0.005859
CALM2,11.0,4.0,8.0,0.274414
CALM3,11.0,4.0,8.0,0.274414
CAMK2A,7.0,3.0,5.0,0.5
CAMK2B,7.0,3.0,5.0,0.5
CAMK2D,7.0,3.0,5.0,0.5
CAMK2G,7.0,3.0,5.0,0.5
CASP1,1.0,2.0,0.0,1.0
CBL,4.0,2.0,3.0,0.6875


In [27]:
p_val = list(concordance_df['p-value'])
corrected_p_val = p_val_correction(p)

[1.0, 0.005859375000000001, 0.2744140625, 0.2744140625, 0.4999999999999999, 0.4999999999999999, 0.4999999999999999, 0.4999999999999999, 1.0, 0.6875, 0.6562499999999999, 0.6562499999999999, 0.875, 0.3437500000000001, 0.14810318429954358, 0.36416624044068147, 0.14810318429954358, 0.36416624044068147, 0.4999999999999999, 0.4999999999999999, 0.6367187499999999, 0.10937500000000001, 0.10937500000000001, 0.7734375000000002, 0.7734375000000002, 0.49999999999999983, 0.010742187499999998, 0.4999999999999999, 0.14810318429954358, 0.4999999999999999, 0.75, 0.3437500000000001, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
