In [51]:
import configparser
import math

import psycopg2
import pandas
from neo4j import GraphDatabase

import hetio.readwrite
import hetio.neo4j

In [52]:
parser = configparser.ConfigParser()
parser.read('database.ini')

db_password = parser['psql']['password']

In [53]:
epilepsy_id = 'DOID:1826'

# Get top ten most important metapaths for epilepsy (which are all compound-disease pairs)
query = '''SELECT outer_pc.dwpc as dwpc, outer_pc.p_value as p_value, outer_pc.metapath_id as metapath_id, 
                  top_ids.source_name as source_name, top_ids.target_name as target_name 
            FROM 
                (SELECT dwpc, p_value, metapath_id, source_id, target_id, n1.name AS source_name, n2.name AS target_name 
                 FROM dj_hetmech_app_pathcount pc 
                 JOIN dj_hetmech_app_node join_node  
                     ON pc.target_id=join_node.id OR pc.source_id=join_node.id 
                 JOIN dj_hetmech_app_node n1 
                     ON pc.source_id = n1.id 
                 JOIN dj_hetmech_app_node n2 
                     ON pc.target_id = n2.id     
                 WHERE join_node.identifier='{epilepsy_id}' 
                 ORDER BY pc.p_value ASC LIMIT 10) AS top_ids 
            JOIN dj_hetmech_app_pathcount outer_pc 
                 ON (top_ids.source_id = outer_pc.source_id AND 
                     top_ids.target_id = outer_pc.target_id) OR 
                     (top_ids.source_id = outer_pc.target_id AND 
                     top_ids.target_id = outer_pc.source_id);
        '''.format(epilepsy_id=epilepsy_id)

connection = psycopg2.connect(host = 'hetmech-db-dev.cobepk65dd7j.us-east-1.rds.amazonaws.com', 
                              database = 'dj_hetmech', user = 'read_only_user', password = db_password)

cursor = connection.cursor()

cursor.execute(query)
top_metapaths = cursor.fetchall()

top_metapaths = pandas.DataFrame(top_metapaths, columns=['dwpc', 'p_value', 'metapath', 
                                              'source_name', 'target_name'])

print(top_metapaths.head(30))

        dwpc       p_value  metapath    source_name        target_name
0   4.227000  3.782050e-02     CrCtD     Nitrazepam  epilepsy syndrome
1   3.339060  1.257640e-02   CrCbGaD     Nitrazepam  epilepsy syndrome
2   2.678094  1.661597e-02   CbGaDrD     Nitrazepam  epilepsy syndrome
3   3.643829  4.886503e-02     CbGaD     Nitrazepam  epilepsy syndrome
4   3.432431  1.197356e-02   CbGiGaD     Nitrazepam  epilepsy syndrome
5   3.509434  0.000000e+00  CcSEcCtD     Nitrazepam  epilepsy syndrome
6   4.315787  9.593657e-05   CbGbCtD     Nitrazepam  epilepsy syndrome
7   2.382892  8.231739e-02   CbGuAlD     Nitrazepam  epilepsy syndrome
8   4.777316  3.149730e-03   CrCrCtD     Nitrazepam  epilepsy syndrome
9   4.501139  2.762892e-02     CrCtD      Lorazepam  epilepsy syndrome
10  3.679682  2.096138e-03   CrCbGaD      Lorazepam  epilepsy syndrome
11  3.428266  2.737345e-02     CbGaD      Lorazepam  epilepsy syndrome
12  2.672442  3.961942e-02   CbGiGaD      Lorazepam  epilepsy syndrome
13  1.

In [54]:
url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0-metagraph.json'

metagraph = hetio.readwrite.read_metagraph(url)

In [55]:
def get_paths_for_metapath(metagraph, row):
    '''
    Return a list of dictionaries containing the information for all paths with a given source, target, and metapath
    
    Parameters
    ----------
    metagraph : a hetio.hetnet.Metagraph instance to interpret metapath abbreviations
    row : a row from a pandas dataframe with information about the given metapath, source, and target
    '''
    damping_exponent = .4
    
    metapath_data = metagraph.metapath_from_abbrev(row['metapath'])

    query = hetio.neo4j.construct_pdp_query(metapath_data, path_style='string', property='name')

    driver = GraphDatabase.driver("bolt://neo4j.het.io")
    params = {
        'source': row['source_name'],
        'target': row['target_name'],
        'w': damping_exponent
    }
    with driver.session() as session:
        metapath_result = session.run(query, params)
        metapath_result = metapath_result.data()

    for path in metapath_result:
        path['metapath'] = row['metapath']
        path['metapath_importance'] = row['neg_log_p_value']
        path['path_importance'] = path['metapath_importance'] * path['percent_of_DWPC']
    
    return metapath_result

In [56]:
# Remove any rows with NaN values
top_metapaths = top_metapaths.dropna()

# Columns with a zero p-value are caused by extremely low p-values or are outside of the support
# of the distribution we were calculating p-values from. For this notebook, we'll set them to the lowest p-value found
# , but there is a fix in the works https://github.com/greenelab/hetmech/issues/153
min_p_value = top_metapaths[top_metapaths.p_value != 0].p_value.min()

top_metapaths.loc[top_metapaths.p_value == 0, 'p_value'] = min_p_value
print(top_metapaths.p_value.min())

top_metapaths['neg_log_p_value'] = top_metapaths.p_value.apply(lambda x: -math.log10(x))

1.11022302462516e-16


In [57]:
all_group_dataframes = []
for name, group in top_metapaths.groupby('source_name'):
    group_paths = []
    for index, row in group.iterrows():
        path_list = get_paths_for_metapath(metagraph, row)
        for path in path_list:
            group_paths.append(path)
    group_df = pandas.DataFrame(group_paths)
    group_df = group_df.set_index('path')
    group_df = group_df.sort_values(by=['path_importance'], ascending=False)

    all_group_dataframes.append(group_df)

In [58]:
all_group_dataframes[0]

Unnamed: 0_level_0,PDP,metapath,metapath_importance,path_importance,percent_of_DWPC
path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alprazolam–STXBP1–epilepsy syndrome,0.013801,CuGaD,1.363860,136.385966,100.000000
Alprazolam–KDM5A–Vigabatrin–epilepsy syndrome,0.008568,CuGuCtD,1.598265,79.913244,50.000000
Alprazolam–KDM5A–Zonisamide–epilepsy syndrome,0.008568,CuGuCtD,1.598265,79.913244,50.000000
Alprazolam–panic disorder–Diazepam–epilepsy syndrome,0.015696,CpDpCtD,1.210346,75.370555,62.271902
Alprazolam–Benzodiazepines–Midazolam–epilepsy syndrome,0.033413,CiPCiCtD,2.528904,67.296417,26.610902
Alprazolam–Benzodiazepines–Diazepam–epilepsy syndrome,0.033413,CiPCiCtD,2.528904,67.296417,26.610902
Alprazolam–Benzodiazepines–Clonazepam–epilepsy syndrome,0.033413,CiPCiCtD,2.528904,67.296417,26.610902
Alprazolam–Clonazepam–epilepsy syndrome,0.021358,CrCtD,1.908089,51.308259,26.889871
Alprazolam–Benzodiazepines–Clobazam–epilepsy syndrome,0.025323,CiPCiCtD,2.528904,51.001147,20.167293
Alprazolam–Midazolam–epilepsy syndrome,0.020375,CrCtD,1.908089,48.947021,25.652382


In [59]:
all_group_dataframes[1]

Unnamed: 0_level_0,PDP,metapath,metapath_importance,path_importance,percent_of_DWPC
path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bromazepam–Clonazepam–epilepsy syndrome,0.033690,CrCtD,1.918142,103.667512,54.045786
Bromazepam–Diazepam–epilepsy syndrome,0.028646,CrCtD,1.918142,88.146723,45.954214
Bromazepam–Emotional disturbance NOS–Primidone–epilepsy syndrome,0.001516,CcSEcCtD,15.954590,33.546973,2.102653
Bromazepam–Alertness decreased–Midazolam–epilepsy syndrome,0.001317,CcSEcCtD,15.954590,29.143504,1.826653
Bromazepam–GABRD–epilepsy syndrome,0.007174,CbGaD,1.204977,28.142576,23.355287
Bromazepam–Alertness decreased–Diazepam–epilepsy syndrome,0.001204,CcSEcCtD,15.954590,26.654943,1.670676
Bromazepam–Emotional disturbance NOS–Phenobarbital–epilepsy syndrome,0.001181,CcSEcCtD,15.954590,26.135248,1.638102
Bromazepam–Anterograde amnesia–Clobazam–epilepsy syndrome,0.001133,CcSEcCtD,15.954590,25.063401,1.570921
Bromazepam–Emotional disorder–Primidone–epilepsy syndrome,0.001007,CcSEcCtD,15.954590,22.293299,1.397297
Bromazepam–GABRR1–P2RX2–epilepsy syndrome,0.002118,CbGiGaD,1.904431,21.835449,11.465603


In [60]:
all_group_dataframes[2]

Unnamed: 0_level_0,PDP,metapath,metapath_importance,path_importance,percent_of_DWPC
path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ezogabine–Flupirtine–ADRA2A–epilepsy syndrome,0.008557,CrCbGaD,1.689371,168.937144,100.000000
Ezogabine–KCNQ4–epilepsy syndrome,0.023376,CbGaD,2.068733,82.232366,39.750106
Ezogabine–NAT2–Clonazepam–epilepsy syndrome,0.010879,CbGbCtD,2.422064,62.667973,25.873785
Ezogabine–KCNQ3–epilepsy syndrome,0.017715,CbGaD,2.068733,62.320480,30.124947
Ezogabine–KCNQ2–epilepsy syndrome,0.017715,CbGaD,2.068733,62.320480,30.124947
Ezogabine–KCNQ5–KCNQ3–epilepsy syndrome,0.005575,CbGiGaD,2.358977,56.851101,24.099895
Ezogabine–KCNQ4–KCNQ3–epilepsy syndrome,0.005242,CbGiGaD,2.358977,53.451532,22.658776
Ezogabine–UGT1A4–Lamotrigine–epilepsy syndrome,0.007259,CbGbCtD,2.422064,41.813120,17.263422
Ezogabine–KCNQ2–KCNQ3–epilepsy syndrome,0.003972,CbGiGaD,2.358977,40.508687,17.172141
Ezogabine–KCNQ3–KCNQ4–epilepsy syndrome,0.003972,CbGiGaD,2.358977,40.508687,17.172141
