# Extract edges from hetionet connecting drugs to diseases

In [26]:
import configparser
import numpy as np
import pandas as pd
import psycopg2

import hetio.readwrite
import hetio.pathtools

In [52]:
# read in json format of hetionetv1.0
graph_url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0.json.bz2'
graph = hetio.readwrite.read_graph(graph_url)
meta_url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0-metagraph.json'
metagraph = hetio.readwrite.read_metagraph(meta_url)

### pre-process shorter-half-life drugs extracted from DrugBank

In [223]:
# read in half-life info of drugs
half_life_df = pd.read_csv('https://github.com/dhimmel/drugbank/raw/6b9ae386d6ba4a0eca2d66d4b0337a6e90fe81f4/data/drugbank_subset_halflife_curated.tsv'
                           ,header = 0, sep = '\t')

# half-life < 24 hours
short24_drugs = list(half_life_df[half_life_df.half_life_hours_curated < 24].drugbank_id)
short24_hl = list(half_life_df[half_life_df.half_life_hours_curated < 24].half_life_hours_curated)

In [224]:
# Get the diseases that each drug treats using hetionet
db_pair_drug = []
db_pair_disease = []
db_pair_hl = []
metapath = metagraph.metapath_from_abbrev('CtD')
for i in range(0, len(short24_drugs)):
    drug_id = short24_drugs[i]
    source_id = 'Compound', drug_id
    # whether drug is in hetionet
    if source_id in graph.node_dict:
        treat_paths = hetio.pathtools.paths_from(graph = graph, source = source_id, metapath = metapath)
        treat_len = len(treat_paths)
        if treat_len > 0:
            for j in range(0,treat_len):
                db_pair_drug.append(drug_id)
                db_pair_hl.append(short24_hl[i])
                disease_id = str(treat_paths[j].get_nodes()[1]).split('Disease::')[1]
                db_pair_disease.append(disease_id)

### pre-process circadian treatments extracted from Ruben et al

In [234]:
# treatment data 
treatment = pandas.read_csv('data/HumCircMed2018v2_mapped.tsv', sep = '\t')

rb_pair_drug = []
rb_pair_disease = []
rb_pair_hl = []
# filter the treatment data
for i in range(0,len(treatment)):
    drug_id = treatment['drug.trtmnt.DrugBankID'][i]
    if drug_id != 'nan':
        # whether treatment contains only one drug
        if len(drug_id.split(',')) == 1:
            source_id = 'Compound', drug_id
            # whether drug is in hetionet
            if source_id in graph.node_dict:
                disease_id = treatment['therapeutic.area.DOID'][i]
                if disease_id != 'nan':
                    rb_pair_drug.append(drug_id)
                    rb_pair_disease.append(disease_id)
                    rb_pair_hl.append(treatment['halflife.hrs'][i])

AttributeError: 'float' object has no attribute 'split'

In [250]:
drug_id == 

nan

In [135]:
## define metapath types 
# compound -> gene -> disease
g_type = ['CbGaD','CbGdD','CbGuD',
          'CdGaD','CdGdD','CdGuD',
          'CuGaD','CuGdD','CuGuD']
# compound -> gene -> compound -> disease
gc_type = ['CbGbCpD','CbGbCtD','CbGdCpD','CbGdCtD','CbGuCpD','CbGuCtD',
           'CdGbCpD','CdGbCtD','CdGdCpD','CdGdCtD','CdGuCpD','CdGuCtD',
           'CuGbCpD','CuGbCtD','CuGdCpD','CuGdCtD','CuGuCpD','CuGuCtD']
# compound -> gene -> disease -> disease
gd_type = ['CbGaDrD','CbGdDrD','CbGuDrD',
           'CdGaDrD','CdGdDrD','CdGuDrD',
           'CuGaDrD','CuGdDrD','CuGuDrD']
# compound -> compound -> gene -> disease
cg_type = ['CrCbGaD','CrCbGdD','CrCbGuD',
           'CrCdGaD','CrCdGdD','CrCdGuD',
           'CrCuGaD','CrCuGdD','CrCuGuD']
# compound -> disease -> gene -> disease
dg_type = ['CpDaGaD','CpDaGdD','CpDaGuD',
           'CpDdGaD','CpDdGdD','CpDdGuD',
           'CpDuGaD','CpDuGdD','CpDuGuD',
           'CtDaGaD','CtDaGdD','CtDaGuD',
           'CtDdGaD','CtDdGdD','CtDdGuD',
           'CtDuGaD','CtDuGdD','CtDuGuD']
types = [gc_type, gd_type, cg_type, dg_type]

In [3]:
parser = configparser.ConfigParser()
parser.read('database.ini')
db_password = parser['psql']['password']

In [5]:
connection = psycopg2.connect(host = 'hetmech-db-dev.cobepk65dd7j.us-east-1.rds.amazonaws.com', 
                              database = 'dj_hetmech', user = 'read_only_user', password = db_password)

# extract hetionet edges that connect query compound to treatment
edge_info_list = []
for i in range(0,treatment_filter.shape[0]):    
    # compound ~ treatment info
    i_drug = treatment_filter.iloc[i,2]
    i_disease = treatment_filter.iloc[i,9]
    i_halflife = str(treatment_filter.iloc[i,7])
    i_effect = treatment_filter.iloc[i,13]
    
    # extract hetionet node IDs of compound and treatment
    id_query = f'''
    SELECT dj_hetmech_app_node.id, dj_hetmech_app_node.identifier
    FROM dj_hetmech_app_node 
    WHERE dj_hetmech_app_node.identifier='{i_drug}' OR dj_hetmech_app_node.identifier='{i_disease}'
    ORDER BY dj_hetmech_app_node.identifier;
    '''
    pair_id = pandas.read_sql(id_query, connection)

    # both compound and treatment are in hetionet
    if pair_id.shape[0] == 2:      
        # extract metatype of edges
        i_drug_id = pair_id.iloc[0,0]
        i_disease_id = pair_id.iloc[1,0]    
        pair_query = f'''
        SELECT *  
        FROM dj_hetmech_app_pathcount dhap 
        WHERE (dhap.source_id='{i_drug_id}' AND dhap.target_id='{i_disease_id}');
        '''
        pair_metapaths = pandas.read_sql(pair_query, connection)

        #
        if pair_metapaths.shape[0] > 0:
            #
            for j in range(0,pair_metapaths.shape[0]):
                j_meta = pair_metapaths.iloc[j,4]
                # Gene-related metapaths
                if 'G' in j_meta:
                    #
                    source_id = 'Compound', i_drug  
                    target_id = 'Disease', i_disease 
                    metapath = metagraph.metapath_from_abbrev(j_meta)
                    j_paths = hetio.pathtools.paths_between(graph = graph, source =source_id, target = target_id, metapath = metapath)

                    # output edges info
                    for k in range(0,len(j_paths)):
                        jp_nodes = j_paths[k].get_nodes()
                        jp_info = [i_effect, i_halflife, j_meta]
                        jp_char = '\t'.join(str(ji) for ji in jp_info) + '\t' + '\t'.join(str(jn) for jn in list(jp_nodes))
                        edge_info_list.append(jp_char)

In [6]:
# output
f = open('data/HumCircMed2018v2_mapped_hetionet_edges.tsv','w')
for i in range(0,len(edge_info_list)):
    f.write('%s\n' % edge_info_list[i])
f.close()