In [4]:
import requests
def query_single_edge(_input, _output, _value):
    """
        Retrieve results using one API call from input to output.
        :param _input: the input prefix, for a list of input prefix in BioThings Explorer, visit: http://biothings.io/explorer/api/v2/metadata/bioentities
        :param _output: the output prefix, for a list of output prefix in BioThings Explorer, visit: http://biothings.io/explorer/api/v2/metadata/bioentities
        :_value: The value of the input
        :return:
    """
    doc = requests.get('http://biothings.io/explorer/api/v2/directinput2output?input_prefix={{input}}&output_prefix={{output}}&input_value={{value}}&format=translator'.
                      replace("{{input}}", _input).replace("{{output}}", _output).replace("{{value}}", _value)).json()
    return doc

## What are the defining symptoms / phenotypes of [condition x]?

Input Type: OMIM Disease ID/DOID/MONDO ID

Input Value: 603903(OMIM DISEASE ID)

Expected Output Type: Human Phenotype Ontology ID

In [7]:
# call BioThings Explorer to retrieve phenotype associated with OMIM:603903
results = query_single_edge(_input='omim.disease', 
                            _output='hp', 
                            _value='603903')

In [10]:
# display example output for node
results['result_list']['node_list'][0]

{'description': None,
 'id': 'HP:0002204',
 'name': 'Pulmonary embolism',
 'type': 'phenotype'}

In [11]:
# display example output for edge
results['result_list']['edge_list'][0]

{'evidence_type': None,
 'provided_by': 'hpoa.ttl',
 'publications': None,
 'qualifier': {'category': None, 'probability': None, 'pvalue': None},
 'relation': 'has phenotype',
 'source_id': 'OMIM.DISEASE:603903',
 'target_id': 'HP:0002204'}

In [12]:
# fetch all Human Phenotype Ontology IDs for the next step
phenotypes = [_doc['id'] for _doc in results['result_list']['node_list']]

In [13]:
# display the first 10 phenotypes
phenotypes[:10]

['HP:0002204',
 'HP:0000823',
 'HP:0000980',
 'HP:0001941',
 'HP:0004825',
 'HP:0000097',
 'HP:0007917',
 'HP:0003155',
 'HP:0001712',
 'HP:0002098']

## What conditions present [symptoms]?

Input Type: Human Phenotype Ontology ID

Input Value: ['HP:0002204', 'HP:0000823', 'HP:0000980', 'HP:0001941', 'HP:0004825', 'HP:0000097', 'HP:0007917', 'HP:0003155', 'HP:0001712', 'HP:0002098']

Expected Output: OMIM Disease ID

In [15]:
similar_conditions = []
# loop through the first 100 human phenotype ontology IDs got from CELL 12
for _phenotype in phenotypes[:100]:
    results = query_single_edge(_input='hp', 
                                _output='omim.disease', 
                                _value=_phenotype.split(':')[-1])
    if results and 'result_list' in results:
        similar_conditions += results['result_list']['node_list']

In [17]:
# extract the unique conditions from results
similar_conditions = set([_doc['id'] for _doc in similar_conditions])

In [18]:
# display the first 10 conditions
list(similar_conditions)[:10]

['OMIM.DISEASE:112200',
 'OMIM.DISEASE:615988',
 'OMIM.DISEASE:615515',
 'OMIM.DISEASE:149400',
 'OMIM.DISEASE:266200',
 'OMIM.DISEASE:239199',
 'OMIM.DISEASE:613576',
 'OMIM.DISEASE:261000',
 'OMIM.DISEASE:615415',
 'OMIM.DISEASE:604498']

## Module 1: What genes are implicated in [condition]?

Input Type: OMIM disease ID

Input Value: ['OMIM.DISEASE:612336', 'OMIM.DISEASE:614514', 'OMIM.DISEASE:176860', 'OMIM.DISEASE:612304', 'OMIM.DISEASE:613485', 'OMIM.DISEASE:134400', 'OMIM.DISEASE:613118', 'OMIM.DISEASE:188050', 'OMIM.DISEASE:615961', 'OMIM.DISEASE:616834']

Expected Output Type: ncbigene ID

In [19]:
implicated_genes = []
# loop through the first 100 similar conditions got from CELL 15
for _condition in list(similar_conditions)[:100]:
    results = query_single_edge(_input='omim.disease', 
                                _output='ncbigene', 
                                _value=_condition.split(':')[-1])
    if results and 'result_list' in results:
        implicated_genes += results['result_list']['node_list']

In [21]:
# extract the unique genes from results
implicated_genes = set([_doc['id'] for _doc in implicated_genes])

In [22]:
# display the first 10 conditions
list(implicated_genes)[:10]

['NCBIGENE:326',
 'NCBIGENE:2176',
 'NCBIGENE:2833',
 'NCBIGENE:1471',
 'NCBIGENE:5313',
 'NCBIGENE:5972',
 'NCBIGENE:7439',
 'NCBIGENE:2066',
 'NCBIGENE:2263',
 'NCBIGENE:5836']

## Module 1: What pathways/processes are [genes] involved in?

Input Type: ncbigene ID

Input Value: ['NCBIGENE:3026',
 'NCBIGENE:462',
 'NCBIGENE:4615',
 'NCBIGENE:4247',
 'NCBIGENE:5373',
 'NCBIGENE:7056',
 'NCBIGENE:3273',
 'NCBIGENE:2147',
 'NCBIGENE:29929',
 'NCBIGENE:2243']

Expected Output Type: Reactome Pathway ID

In [25]:
involved_pathways = []
# loop through the first 50 implicated genes got from CELL 15
for _gene in list(implicated_genes)[:50]:
    results = query_single_edge(_input='ncbigene', 
                                _output='reactome.pathway', 
                                _value=_gene.split(':')[-1])
    if results and 'result_list' in results:
        involved_pathways += [_doc['id'] for _doc in results['result_list']['node_list'] if _doc['id'].startswith('REACT')]

In [26]:
len(set(involved_pathways))

874

In [27]:
# display the first 10 pathways
list(set(involved_pathways))[:10]

['REACTOME.PATHWAY:REACT:R-HSA-190377',
 'REACTOME.PATHWAY:R-HSA-5668914',
 'REACTOME.PATHWAY:KEGG-path:maphsa04062',
 'REACTOME.PATHWAY:R-HSA-674695',
 'REACTOME.PATHWAY:R-HSA-180910',
 'REACTOME.PATHWAY:KEGG-path:maphsa05414',
 'REACTOME.PATHWAY:R-HSA-1852241',
 'REACTOME.PATHWAY:REACT:R-HSA-983705',
 'REACTOME.PATHWAY:REACT:R-HSA-4570464',
 'REACTOME.PATHWAY:R-HSA-446728']

## Module 1: What genes are involved in [pathway/process]?

Input Type: Reactome Pathway ID

Input Value: ['REACTOME.PATHWAY:R-HSA-372790',
 'REACTOME.PATHWAY:R-HSA-392499',
 'REACTOME.PATHWAY:R-HSA-456926',
 'REACTOME.PATHWAY:R-HSA-140877',
 'REACTOME.PATHWAY:R-HSA-1280218',
 'REACTOME.PATHWAY:R-HSA-193704',
 'REACTOME.PATHWAY:R-HSA-76005',
 'REACTOME.PATHWAY:R-HSA-168188',
 'REACTOME.PATHWAY:R-HSA-114608',
 'REACTOME.PATHWAY:R-HSA-159854']

Expected Output Type: ncbigene ID

In [28]:
involved_genes = []
for _pathway in list(set(involved_pathways))[:50]:
    results = query_single_edge(_input='reactome.pathway', 
                                _output='ncbigene', 
                                _value=_pathway.split(':')[-1])
    if results and 'result_list' in results:
        involved_genes += [_doc['id'] for _doc in results['result_list']['node_list']]

In [29]:
len(set(involved_genes))

1768

In [30]:
# display the last 10 genes
list(set(involved_genes))[-10:]

['NCBIGENE:8740',
 'NCBIGENE:10699',
 'NCBIGENE:23169',
 'NCBIGENE:3141',
 'NCBIGENE:1759',
 'NCBIGENE:57161',
 'NCBIGENE:8148',
 'NCBIGENE:57171',
 'NCBIGENE:5699',
 'NCBIGENE:4759']

## Module 1: What drugs/compounds target gene products of [gene]?

Input Type: ncbigene ID

Input Value: ['NCBIGENE:10161',
 'NCBIGENE:55701',
 'NCBIGENE:89790',
 'NCBIGENE:23370',
 'NCBIGENE:81168',
 'NCBIGENE:120775',
 'NCBIGENE:5190',
 'NCBIGENE:27237',
 'NCBIGENE:974',
 'NCBIGENE:3805']

Expected Output Type: ChEMBL Compound ID

In [31]:
involved_gene_symbols = []
for _gene in list(set(involved_genes))[-100:]:
    results = query_single_edge(_input='ncbigene', 
                                _output='hgnc.symbol', 
                                _value=_gene.split(':')[-1])
    if results and 'result_list' in results:
        involved_gene_symbols += [_doc['id'] for _doc in results['result_list']['node_list']]

In [32]:
involved_gene_symbols[:10]

['HGNC.SYMBOL:HIBADH',
 'HGNC.SYMBOL:CDC40',
 'HGNC.SYMBOL:TTC8',
 'HGNC.SYMBOL:RFT1',
 'HGNC.SYMBOL:KCNK4',
 'HGNC.SYMBOL:ARF1',
 'HGNC.SYMBOL:NXF2',
 'HGNC.SYMBOL:TCTEX1D1',
 'HGNC.SYMBOL:GCA',
 'HGNC.SYMBOL:MALT1']

In [33]:
drugs = []
for _gene in set(involved_gene_symbols):
    results = query_single_edge(_input='hgnc.symbol', 
                                _output='chembl.compound', 
                                _value=_gene.split(':')[-1])
    if results and 'result_list' in results:
        drugs += [_doc['id'] for _doc in results['result_list']['node_list']]

In [34]:
drugs[:10]

['CHEMBL.COMPOUND:CHEMBL14762',
 'CHEMBL.COMPOUND:CHEMBL488436',
 'CHEMBL.COMPOUND:CHEMBL1230607',
 'CHEMBL.COMPOUND:CHEMBL1944698',
 'CHEMBL.COMPOUND:CHEMBL3544942',
 'CHEMBL.COMPOUND:CHEMBL445813',
 'CHEMBL.COMPOUND:CHEMBL1802728',
 'CHEMBL.COMPOUND:CHEMBL448',
 'CHEMBL.COMPOUND:CHEMBL564829',
 'CHEMBL.COMPOUND:CHEMBL3545083']

## Annotations Path

### What are the common side effects of [drug]?
Input Type: chembl compound ID

Exprected Output Type: UMLS Disease ID

In [43]:
def fetch_drug_common_side_effects(chembl_id, threshold=20):
    side_effects = query_single_edge(_input='chembl.compound', 
                                     _output='meddra',
                                     _value=chembl_id.split(':')[-1])
    if side_effects and 'result_list' in side_effects:
        return [_doc['target_id'] for _doc in side_effects['result_list']['edge_list'] if float(_doc['qualifier']['probability']) > threshold]
    else:
        return []

In [44]:
%pdb
fetch_drug_common_side_effects('CHEMBL.COMPOUND:CHEMBL744')

Automatic pdb calling has been turned OFF


['MEDDRA:10001551',
 'MEDDRA:10002026',
 'MEDDRA:10010264',
 'MEDDRA:10011224',
 'MEDDRA:10011906',
 'MEDDRA:10022611',
 'MEDDRA:10028813',
 'MEDDRA:10035664',
 'MEDDRA:10038695',
 'MEDDRA:10060760',
 'MEDDRA:10061818',
 'MEDDRA:10063560',
 'MEDDRA:10013950',
 'MEDDRA:10037660']

In [124]:
def fetch_drug_indication(chembl_id):
    """
    Retrieve drug indications of a drug using BioThings Explorer.
    :param chembl_id: CHEMBL compound ID in curie format, e.g. CHEMBL.COMPOUND:CHEMBL1480
    :return:
    """
    indications = query_single_edge(_input='chembl.compound', 
                                    _output='umls.disease',
                                    _value=chembl_id.split(':')[-1])
    if indications and 'data' in indications:
        return [_doc['output']['object'] for _doc in indications['data']]
    else:
        return []

def fetch_drug_approval_status(chembl_id):
    """
    Retrieve drug approval status of a drug using BioThings Explorer.
    :param chembl_id: CHEMBL compound ID in curie format, e.g. CHEMBL.COMPOUND:CHEMBL1480
    :return:
    """
    drug_phase = query_single_edge(_input='chembl.compound', 
                                   _output='drugPhase',
                                   _value=chembl_id.split(':')[-1])
    if drug_phase and 'data' in drug_phase:
        return [_doc['output']['object'] for _doc in drug_phase['data']]
    else:
        return []

In [125]:
fetch_drug_side_effects('CHEMBL.COMPOUND:CHEMBL744')

[{'id': 'UMLS.DISEASE:C0002736',
  'label': 'Amyotrophic lateral sclerosis',
  'secondary-id': 'SNOMED:86044005'}]

In [128]:
def fetch_drug_approval_status(chembl_id):
    drug_phase = direct_input_2_output(_input='chembl.compound', 
                                        _output='drugPhase',
                                        _value=chembl_id.split(':')[-1])
    if drug_phase and 'data' in drug_phase:
        return [_doc['output']['object'] for _doc in drug_phase['data']]
    else:
        return []

In [133]:
fetch_drug_approval_status('CHEMBL.COMPOUND:CHEMBL744')

[{'id': 'DRUGPHASE:4'}]

In [131]:
def fetch_drug_common_side_effects(chembl_id, threshold=20):
    side_effects = direct_input_2_output(_input='chembl.compound', 
                                        _output='meddra',
                                        _value=chembl_id.split(':')[-1])
    if side_effects and 'data' in side_effects:
        return [_doc['output']['object'] for _doc in side_effects['data'] if float(_doc['output']['edge']['probability']) > threshold]
    else:
        return []

In [132]:
fetch_drug_common_side_effects('CHEMBL.COMPOUND:CHEMBL744', threshold=50)

[{'id': 'MEDDRA:10002026', 'label': 'Amyotrophic lateral sclerosis'},
 {'id': 'MEDDRA:10011906', 'label': 'Death'},
 {'id': 'MEDDRA:10061818', 'label': 'Disease progression'},
 {'id': 'MEDDRA:10037660', 'label': 'Pyrexia'}]

In [134]:
def fetch_drug_EPC(chembl_id):
    drug_EPC = direct_input_2_output(_input='chembl.compound', 
                                        _output='ndfrt',
                                        _value=chembl_id.split(':')[-1])
    if drug_EPC and 'data' in drug_EPC:
        return [_doc['output']['object'] for _doc in drug_EPC['data']]
    else:
        return []

In [135]:
fetch_drug_EPC('CHEMBL.COMPOUND:CHEMBL744')

[{'id': 'NDFRT:N0000175740', 'label': 'Benzothiazole'}]

In [136]:
chembl_compound_set_RTX = {'CHEMBL.COMPOUND:CHEMBL107',
 'CHEMBL.COMPOUND:CHEMBL1200936',
 'CHEMBL.COMPOUND:CHEMBL1201748',
 'CHEMBL.COMPOUND:CHEMBL1201752',
 'CHEMBL.COMPOUND:CHEMBL1201754',
 'CHEMBL.COMPOUND:CHEMBL15770',
 'CHEMBL.COMPOUND:CHEMBL159',
 'CHEMBL.COMPOUND:CHEMBL1683590',
 'CHEMBL.COMPOUND:CHEMBL170052',
 'CHEMBL.COMPOUND:CHEMBL1742994',
 'CHEMBL.COMPOUND:CHEMBL1743082',
 'CHEMBL.COMPOUND:CHEMBL1788401',
 'CHEMBL.COMPOUND:CHEMBL2028663',
 'CHEMBL.COMPOUND:CHEMBL2104461',
 'CHEMBL.COMPOUND:CHEMBL2110725',
 'CHEMBL.COMPOUND:CHEMBL3545110',
 'CHEMBL.COMPOUND:CHEMBL3770899',
 'CHEMBL.COMPOUND:CHEMBL409153',
 'CHEMBL.COMPOUND:CHEMBL428647',
 'CHEMBL.COMPOUND:CHEMBL438',
 'CHEMBL.COMPOUND:CHEMBL467',
 'CHEMBL.COMPOUND:CHEMBL523',
 'CHEMBL.COMPOUND:CHEMBL553025',
 'CHEMBL.COMPOUND:CHEMBL90555',
 'CHEMBL.COMPOUND:CHEMBL92'}

In [140]:
RTX_drugs_annotation = {}
for _compound in chembl_compound_set_RTX:
    print(_compound)
    RTX_drugs_annotation[_compound] = {'common_side_effects': fetch_drug_common_side_effects(_compound, threshold=50),
                                       'approval_status': fetch_drug_approval_status(_compound),
                                       'approved_indications': fetch_drug_indication(_compound),
                                       'established_pharm_class': fetch_drug_EPC(_compound)
                                       }

CHEMBL.COMPOUND:CHEMBL467
CHEMBL.COMPOUND:CHEMBL159
CHEMBL.COMPOUND:CHEMBL438
CHEMBL.COMPOUND:CHEMBL1201754
CHEMBL.COMPOUND:CHEMBL2110725
CHEMBL.COMPOUND:CHEMBL3545110
CHEMBL.COMPOUND:CHEMBL2028663
CHEMBL.COMPOUND:CHEMBL1742994
CHEMBL.COMPOUND:CHEMBL1200936
CHEMBL.COMPOUND:CHEMBL553025
CHEMBL.COMPOUND:CHEMBL170052
CHEMBL.COMPOUND:CHEMBL107
CHEMBL.COMPOUND:CHEMBL90555
CHEMBL.COMPOUND:CHEMBL409153
CHEMBL.COMPOUND:CHEMBL1201748
CHEMBL.COMPOUND:CHEMBL92
CHEMBL.COMPOUND:CHEMBL523
CHEMBL.COMPOUND:CHEMBL428647
CHEMBL.COMPOUND:CHEMBL1788401
CHEMBL.COMPOUND:CHEMBL15770
CHEMBL.COMPOUND:CHEMBL2104461
CHEMBL.COMPOUND:CHEMBL1201752
CHEMBL.COMPOUND:CHEMBL1683590
CHEMBL.COMPOUND:CHEMBL1743082
CHEMBL.COMPOUND:CHEMBL3770899
