In [79]:
import configparser
import math

import psycopg2
import pandas
from neo4j import GraphDatabase

import hetio.readwrite
import hetio.neo4j

In [80]:
# Get epilsepsy disease ontology ID
epilepsy_id = 'DOID:1826'


# Query all the metapaths for each compound, epilepsy pair
# Get the individual paths for each compound, epilepsy pair
# Use the path global ranking for each compound


In [81]:
# I created a database.ini file to store the database password
parser = configparser.ConfigParser()
parser.read('database.ini')

db_password = parser['psql']['password']

In [82]:
# Get top ten most important metapaths for epilepsy (which are all compound-disease pairs)
query = '''SELECT outer_pc.dwpc as dwpc, outer_pc.p_value as p_value, outer_pc.metapath_id as metapath_id, 
                  top_ids.source_name as source_name, top_ids.target_name as target_name 
            FROM 
                (SELECT dwpc, p_value, metapath_id, source_id, target_id, n1.name AS source_name, n2.name AS target_name 
                 FROM dj_hetmech_app_pathcount pc 
                 JOIN dj_hetmech_app_node join_node  
                     ON pc.target_id=join_node.id OR pc.source_id=join_node.id 
                 JOIN dj_hetmech_app_node n1 
                     ON pc.source_id = n1.id 
                 JOIN dj_hetmech_app_node n2 
                     ON pc.target_id = n2.id     
                 WHERE join_node.identifier='{epilepsy_id}' 
                 ORDER BY pc.p_value ASC LIMIT 10) AS top_ids 
            JOIN dj_hetmech_app_pathcount outer_pc 
                 ON (top_ids.source_id = outer_pc.source_id AND 
                     top_ids.target_id = outer_pc.target_id) OR 
                     (top_ids.source_id = outer_pc.target_id AND 
                     top_ids.target_id = outer_pc.source_id);
        '''.format(epilepsy_id=epilepsy_id)

connection = psycopg2.connect(host = 'hetmech-db-dev.cobepk65dd7j.us-east-1.rds.amazonaws.com', 
                              database = 'dj_hetmech', user = 'read_only_user', password = db_password)

cursor = connection.cursor()

cursor.execute(query)
top_metapaths = cursor.fetchall()

top_metapaths = pandas.DataFrame(top_metapaths, columns=['dwpc', 'p_value', 'metapath', 
                                              'source_name', 'target_name'])

print(top_metapaths)

        dwpc       p_value  metapath            source_name        target_name
0   4.227000  3.782050e-02     CrCtD             Nitrazepam  epilepsy syndrome
1   3.339060  1.257640e-02   CrCbGaD             Nitrazepam  epilepsy syndrome
2   2.678094  1.661597e-02   CbGaDrD             Nitrazepam  epilepsy syndrome
3   3.643829  4.886503e-02     CbGaD             Nitrazepam  epilepsy syndrome
4   3.432431  1.197356e-02   CbGiGaD             Nitrazepam  epilepsy syndrome
5   3.509434  0.000000e+00  CcSEcCtD             Nitrazepam  epilepsy syndrome
6   4.315787  9.593657e-05   CbGbCtD             Nitrazepam  epilepsy syndrome
7   2.382892  8.231739e-02   CbGuAlD             Nitrazepam  epilepsy syndrome
8   4.777316  3.149730e-03   CrCrCtD             Nitrazepam  epilepsy syndrome
9   4.501139  2.762892e-02     CrCtD              Lorazepam  epilepsy syndrome
10  3.679682  2.096138e-03   CrCbGaD              Lorazepam  epilepsy syndrome
11  3.428266  2.737345e-02     CbGaD              Lo

In [83]:
url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0-metagraph.json'

metagraph = hetio.readwrite.read_metagraph(url)

In [97]:
def get_paths_for_metapath(metagraph, metapath_name, source, target):
    damping_exponent = .4

    metapath_data = metagraph.metapath_from_abbrev(metapath_name)

    query = hetio.neo4j.construct_pdp_query(metapath_data, path_style='list', property='name')

    driver = GraphDatabase.driver("bolt://neo4j.het.io")
    params = {
        'source': source,
        'target': target,
        'w': damping_exponent
    }
    with driver.session() as session:
        metapath_result = session.run(query, params)
        metapath_result = metapath_result.data()

    return metapath_result

In [99]:
# Split dataframe by source_name, and combine path importance rankings for each one like in book 9

top_metapaths = top_metapaths.dropna()

# Columns with a zero p-value are caused by extremely low p-values or are outside of the support
# of the distribution we were calculating p-values from
top_metapaths = top_metapaths[top_metapaths.p_value != 0]

top_metapaths['neg_log_p_value'] = top_metapaths.p_value.apply(lambda x: -math.log10(x))

for row in top_metapaths.rows:
    print(row)

# maybe result_type='expand'
df = top_metapaths.apply(func=lambda row: get_paths_for_metapath(metagraph, row.metapath, 
                                                            row.source_name, row.target_name), axis=1)

AttributeError: 'DataFrame' object has no attribute 'rows'

In [96]:
#for name, group in top_metapaths.groupby('source_name'):
print(df)

0             PDP                                   ...
1              PDP                                  ...
2             PDP                                   ...
3             PDP                                   ...
4              PDP                                  ...
6               PDP                                 ...
7              PDP                                  ...
8              PDP                                  ...
9             PDP                                   ...
10              PDP                                 ...
11            PDP                                   ...
12            PDP                                   ...
13            PDP                                   ...
14            PDP                                   ...
15               PDP                                ...
16            PDP                                   ...
17             PDP                                  ...
18            PDP                               