In [1]:
from biothings_explorer.user_query_dispatcher import FindConnection
from biothings_explorer.hint import Hint

In [2]:
ht = Hint()
# find all potential representations of CML
cml_hint = ht.query("chronic myelogenous leukemia")
# select the correct representation of CML
cml = cml_hint['Disease'][0]
cml

{'MONDO': 'MONDO:0011996',
 'DOID': 'DOID:8552',
 'UMLS': 'C0023473',
 'name': 'chronic myelogenous leukemia, BCR-ABL1 positive',
 'OMIM': '608232',
 'ORPHANET': '521',
 'primary': {'identifier': 'MONDO',
  'cls': 'Disease',
  'value': 'MONDO:0011996'},
 'display': 'MONDO(MONDO:0011996) DOID(DOID:8552) OMIM(608232) ORPHANET(521) UMLS(C0023473) name(chronic myelogenous leukemia, BCR-ABL1 positive)',
 'type': 'Disease'}

In [3]:
# find all potential representations of imatinib
imatinib_hint = ht.query("imatinib")
# select the correct representation of imatinib
imatinib = imatinib_hint['ChemicalSubstance'][0]
imatinib

{'DRUGBANK': 'DB00619',
 'CHEBI': 'CHEBI:45783',
 'name': 'imatinib',
 'primary': {'identifier': 'CHEBI',
  'cls': 'ChemicalSubstance',
  'value': 'CHEBI:45783'},
 'display': 'CHEBI(CHEBI:45783) DRUGBANK(DB00619) name(imatinib)',
 'type': 'ChemicalSubstance'}

In [4]:
fc = FindConnection(input_obj=cml, output_obj=imatinib, intermediate_nodes='Gene')

In [5]:
# set verbose=True will display all steps which BTE takes to find the connection
fc.connect(verbose=True)


BTE will find paths that join 'chronic myelogenous leukemia, BCR-ABL1 positive' and 'imatinib'. Paths will have 1 intermediate node.

Intermediate node #1 will have these type constraints: Gene



==== Step #1: Query path planning ====

Because chronic myelogenous leukemia, BCR-ABL1 positive is of type 'Disease', BTE will query our meta-KG for APIs that can take 'Disease' as input and 'Gene' as output

BTE found 10 apis:

API 1. DISEASES(1 API call)
API 2. mydisease(1 API call)
API 3. hetio(1 API call)
API 4. biolink(1 API call)
API 5. scibite(1 API call)
API 6. mgi_gene2phenotype(1 API call)
API 7. semmed_disease(15 API calls)
API 8. scigraph(1 API call)
API 9. pharos(1 API call)
API 10. cord_disease(1 API call)


==== Step #2: Query path execution ====
NOTE: API requests are dispatched in parallel, so the list of APIs below is ordered by query time.

API 2.1: https://mydisease.info/v1/query?fields=disgenet.genes_related_to_disease.gene_id (POST -d q=C0023473,C0023474&scopes=mondo.xr


After id-to-object translation, BTE retrieved 730 unique objects.



BTE found 251 unique intermediate nodes connecting 'chronic myelogenous leukemia, BCR-ABL1 positive' and 'imatinib'


## Retrieve the networkx graph from the query

In [6]:
g = fc.fc.sub_G

## Retrieve all nodes

In [7]:
g.nodes()

NodeView(('GLI3', 'CDKN2B', 'C1708843', 'BCL2', 'C1335439', 'AHI1', 'PTEN', 'C3272447', 'FIP1L1', 'CDKN1A', 'FYN', 'C3814146', 'TNFAIP1', '7322', 'C1708630', 'MAPK1', 'C1704873', 'C0164786', 'C0031686', 'C1565434', 'C1705748', 'C1705280', 'EPHB4', 'TIMP1', 'CD82', 'FAP', 'CD4', 'G6PD', 'WDTC1', 'PDLIM5', 'C0752312', 'H19', 'TBXAS1', 'FGR', 'INS', 'MIR21', 'C1707145', 'NDUFB4', 'PTPN6', 'C1705846', 'PTS', 'SQSTM1', 'AMN', 'C1709425', 'imatinib', 'BLK', 'KIT', 'CREBBP', 'SH2B2', 'ARHGAP32', 'HSPA4', 'SEPHS1', 'C1439337', 'PCNA', 'C1705697', 'ABL1', 'MSC', 'C1705767', 'C0017337', 'EPB42', 'C0387583', 'PPP2R5C', 'C1707163', 'KITLG', 'FLT3', 'NF1', 'TERT', 'SMS', 'IL2', 'TCEAL1', 'WT1', 'C1307407', 'TMED7', 'C1706040', 'ETV6', 'C0014442', 'C0087071', 'NPB', 'BCR', 'SKI', 'CXCR4', 'C1334043', 'IL3', 'PRH1', 'CA2', 'ERBB2', 'LYN', 'ATP8A2', 'GPT', 'PIK3CB', 'TGFB1', 'TP53', 'MPO', 'RHOA', 'DOK1', 'CD44', 'C1705326', 'C0079073', 'C0044602', 'SERPINE2', 'DLL1', 'SLC22A1', 'SMAD3', 'MICE', 'SHC1

## Retrieve the start and end node

In [8]:
[item for item in g.nodes() if g.nodes[item]['level'] == 1]

['imatinib', 'chronic myelogenous leukemia, BCR-ABL1 positive']

In [9]:
g.nodes["chronic myelogenous leukemia, BCR-ABL1 positive"]

{'type': 'Disease',
 'identifier': 'name',
 'level': 1,
 'equivalent_ids': defaultdict(set,
             {'MONDO': ['MONDO:0011996'],
              'DOID': ['DOID:8552'],
              'UMLS': ['C0023473', 'C0023474'],
              'name': ['CHRONIC MYELOGENOUS LEUKEMIA, BCR-ABL1 POSITIVE',
               'LEUKEMIA, MYELOID, CHRONIC-PHASE'],
              'OMIM': ['608232'],
              'ORPHANET': ['521']})}

In [10]:
g.nodes["FLT3"]

{'identifier': 'SYMBOL',
 'type': 'Gene',
 'level': 2,
 'equivalent_ids': defaultdict(set,
             {'NCBIGene': ['2322'],
              'name': ['FMS RELATED RECEPTOR TYROSINE KINASE 3'],
              'SYMBOL': ['FLT3'],
              'UMLS': ['C1333568'],
              'HGNC': ['3765'],
              'UNIPROTKB': ['P36888'],
              'ENSEMBL': ['ENSG00000122025']})}

In [11]:
g.nodes["imatinib"]

{'type': 'ChemicalSubstance',
 'identifier': 'name',
 'level': 1,
 'equivalent_ids': defaultdict(set,
             {'DRUGBANK': ['DB00619'],
              'CHEBI': ['CHEBI:45783'],
              'name': ['IMATINIB'],
              'CHEMBL.COMPOUND': ['CHEMBL941', 'CHEMBL1642'],
              'PUBCHEM': ['5291'],
              'UMLS': ['C0935989'],
              'MESH': ['C097613', 'D000068877'],
              'UNII': ['BKJ8M8G5HI', '8A1O1M485B']})}

## Retrieve all simple paths between the start and end node

In [12]:
import networkx as nx
for path in nx.all_simple_paths(g, source="chronic myelogenous leukemia, BCR-ABL1 positive", target="imatinib"):
    print(path)

['chronic myelogenous leukemia, BCR-ABL1 positive', 'GLI3', 'imatinib']
['chronic myelogenous leukemia, BCR-ABL1 positive', 'CDKN2B', 'imatinib']
['chronic myelogenous leukemia, BCR-ABL1 positive', 'C1708843', 'imatinib']
['chronic myelogenous leukemia, BCR-ABL1 positive', 'C1708843', 'imatinib']
['chronic myelogenous leukemia, BCR-ABL1 positive', 'C1708843', 'imatinib']
['chronic myelogenous leukemia, BCR-ABL1 positive', 'C1708843', 'imatinib']
['chronic myelogenous leukemia, BCR-ABL1 positive', 'C1708843', 'imatinib']
['chronic myelogenous leukemia, BCR-ABL1 positive', 'C1708843', 'imatinib']
['chronic myelogenous leukemia, BCR-ABL1 positive', 'C1708843', 'imatinib']
['chronic myelogenous leukemia, BCR-ABL1 positive', 'C1708843', 'imatinib']
['chronic myelogenous leukemia, BCR-ABL1 positive', 'C1708843', 'imatinib']
['chronic myelogenous leukemia, BCR-ABL1 positive', 'C1708843', 'imatinib']
['chronic myelogenous leukemia, BCR-ABL1 positive', 'C1708843', 'imatinib']
['chronic myelogen