In [1]:
import configparser
import pandas
import psycopg2

import hetio.readwrite
import hetio.pathtools

In [2]:
# treatment data 
treatment = pandas.read_csv('data/HumCircMed2018v2_mapped.tsv', sep = '\t')
# filter the treatment data 
filter_rids = []
for i in range(0,treatment.shape[0]):
    i_drug = str(treatment.iloc[i,2])
    i_disease = str(treatment.iloc[i,9])
    if i_drug == 'nan':
        continue
    # contains more than one drug, skip
    elif len(i_drug.split(',')) > 1:
        continue
    elif i_disease == 'nan':
        continue
    else:
        filter_rids.append(i)
treatment_filter = treatment.iloc[filter_rids,:]

In [3]:
parser = configparser.ConfigParser()
parser.read('database.ini')

db_password = parser['psql']['password']

In [4]:
# read in hetionetv1.0 as graph and metagreaph
url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0.json.bz2'
graph = hetio.readwrite.read_graph(url)
url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0-metagraph.json'
metagraph = hetio.readwrite.read_metagraph(url)

In [5]:
connection = psycopg2.connect(host = 'hetmech-db-dev.cobepk65dd7j.us-east-1.rds.amazonaws.com', 
                              database = 'dj_hetmech', user = 'read_only_user', password = db_password)

# extract hetionet edges that connect query compound to treatment
edge_info_list = []
for i in range(0,treatment_filter.shape[0]):    
    # compound ~ treatment info
    i_drug = treatment_filter.iloc[i,2]
    i_disease = treatment_filter.iloc[i,9]
    i_halflife = str(treatment_filter.iloc[i,7])
    i_effect = treatment_filter.iloc[i,13]
    
    # extract hetionet node IDs of compound and treatment
    id_query = f'''
    SELECT dj_hetmech_app_node.id, dj_hetmech_app_node.identifier
    FROM dj_hetmech_app_node 
    WHERE dj_hetmech_app_node.identifier='{i_drug}' OR dj_hetmech_app_node.identifier='{i_disease}'
    ORDER BY dj_hetmech_app_node.identifier;
    '''
    pair_id = pandas.read_sql(id_query, connection)

    # both compound and treatment are in hetionet
    if pair_id.shape[0] == 2:      
        # extract metatype of edges
        i_drug_id = pair_id.iloc[0,0]
        i_disease_id = pair_id.iloc[1,0]    
        pair_query = f'''
        SELECT *  
        FROM dj_hetmech_app_pathcount dhap 
        WHERE (dhap.source_id='{i_drug_id}' AND dhap.target_id='{i_disease_id}');
        '''
        pair_metapaths = pandas.read_sql(pair_query, connection)

        #
        if pair_metapaths.shape[0] > 0:
            #
            for j in range(0,pair_metapaths.shape[0]):
                j_meta = pair_metapaths.iloc[j,4]
                # Gene-related metapaths
                if 'G' in j_meta:
                    #
                    source_id = 'Compound', i_drug  
                    target_id = 'Disease', i_disease 
                    metapath = metagraph.metapath_from_abbrev(j_meta)
                    j_paths = hetio.pathtools.paths_between(graph = graph, source =source_id, target = target_id, metapath = metapath)

                    # output edges info
                    for k in range(0,len(j_paths)):
                        jp_nodes = j_paths[k].get_nodes()
                        jp_info = [i_effect, i_halflife, j_meta]
                        jp_char = '\t'.join(str(ji) for ji in jp_info) + '\t' + '\t'.join(str(jn) for jn in list(jp_nodes))
                        edge_info_list.append(jp_char)

In [6]:
# output
f = open('data/HumCircMed2018v2_mapped_hetionet_edges.tsv','w')
for i in range(0,len(edge_info_list)):
    f.write('%s\n' % edge_info_list[i])
f.close()