# Example Query for Survival Probability of 1-hop Queries

Queries our system in the form of:<br>
$P(survival\_time > X | Mut\_g_1 = True, Mut\_g_2 = True, ..., Mut\_g_n = True, Drug = d_1, Drug = d_2, ..., Drug = d_n)$<br>
Returned is a knowledge graph containing probability of survival time.

In [1]:
import requests
import json
import csv

In [2]:
# Function: buildQuery
#
# Input:
# -----------
# list of gene and drug tuples
#
# Output:
# -----------
# A query graph that answers this probablistic question: 
# P(survival_time > X | Mut_g1 = True, Mut_g2 = True, ..., Mut_gn = True, Drug = d1, Drug = d2, ..., Drug = dn)
# Drugs can be left empty, but Genes CANNOT

def buildQuery(st, genes=[], drugs=[]):
    
    # empty response
    reasoner_std = { "query_graph": dict()}
    
    # empty query graph
    reasoner_std["query_graph"] = { "edges": [],
                                    "nodes": []
                                  }
    
    node_count = 0
    edge_count = 0
    
    # add genes
    for gene in genes:
        reasoner_std['query_graph']['nodes'].append({ 'id':'n{}'.format(node_count),
                                                      'type':'Gene',
                                                      'curie':'{}'.format(gene[1])
                                                   })
        node_count += 1
    
    # add drugs
    for drug in drugs:
        reasoner_std['query_graph']['nodes'].append({ 'id':'n{}'.format(node_count),
                                                      'type':'Drug',
                                                      'curie':'{}'.format(drug[1])
                                                   })
        node_count += 1
    
    # add in disease node
    disease = ('Breast_Cancer', 'MONDO:0007254')
    reasoner_std['query_graph']['nodes'].append({ 'id':'n{}'.format(node_count),
                                                  'type':'disease',
                                                  'curie':'{}'.format(disease[1])
                                                })
    node_count += 1
    
    # link all evidence to disease
    for node in reasoner_std['query_graph']['nodes']:
        if node['type'] == 'Gene':
            id = node['id']
            reasoner_std['query_graph']['edges'].append({ 'id':'e{}'.format(edge_count),
                                                          'type':'gene_to_disease_association',
                                                          'source_id':'{}'.format(id),
                                                          'target_id':'n{}'.format(node_count-1)
                                                       })
            edge_count += 1
        elif node['type'] == 'Drug':
            id = node['id']
            reasoner_std['query_graph']['edges'].append({ 'id':'e{}'.format(edge_count),
                                                          'type':'chemical_to_disease_or_phenotypic_feature_association',
                                                          'source_id':'{}'.format(id),
                                                          'target_id':'n{}'.format(node_count-1)
                                                       })
            edge_count += 1
            
    # add target survival node
    phenotype = ('Survival_Time', 'EFO:0000714')
    reasoner_std['query_graph']['nodes'].append({ 'id': 'n{}'.format(node_count),
                                                  'type': 'PhenotypicFeature',
                                                  'curie': '{}'.format(phenotype[1]),
                                                })
    node_count += 1
    
    # link disease to target
    reasoner_std['query_graph']['edges'].append({ 'id':'e{}'.format(edge_count),
                                                  'type':'disease_to_phenotype_association',
                                                  'value':st,
                                                  'source_id':'n{}'.format(node_count-2),
                                                  'target_id':'n{}'.format(node_count-1)
                                                })
    return reasoner_std

In [3]:
def readGenes():
    with open('gene_curie_map.csv', 'r') as gene_file:
        reader = csv.reader(gene_file)
        next(reader)
        rows = [(row[0],row[1]) for row in reader]
    return rows

In [4]:
def readDrugs():
    with open('drug_curie_map.csv', 'r') as drug_file:
        reader = csv.reader(drug_file)
        next(reader)
        rows = [(row[0],row[1]) for row in reader]
    return rows

In [5]:
# list of genes (and curies) we can query over
gene_list = readGenes()

# list of drugs (and curies) we can query over
drug_list = readDrugs()

# or pull from above lists
genes = [('RAF1', 'ENSEMBL:ENSG00000132155'),
         ('MAP3K13','ENSEMBL:ENSG00000073803')]
drugs = []
survival_time = 1000
# if genes or drugs is an empty list you can omit them
query = buildQuery(survival_time, genes=genes, drugs=drugs)
query['reasoner_id'] = 'unsecret'
payload = {'message': query}

r = requests.post('http://chp.thayer.dartmouth.edu/query/', json=payload)
chp_res = json.loads(r.content)

QG = chp_res['query_graph']
KG = chp_res['knowledge_graph']
res = chp_res['results']

# extract probability
for edge in KG['edges']:
    if edge['type'] == 'disease_to_phenotype_association':
        p_survival = edge['has_confidence_level']
        Contribution = edge['Description']

# probability of surival given QG specification
print("Probability of survival > {} days is:".format(survival_time), p_survival)

Probability of survival > 1000 days is: 0.4224032687518743


We can access the feature contributions to our query by navigating the Contriution report. Contributions are ranked by most contributing to least under each truth assignment.

In [6]:
contribution_analysis = Contribution['Contribution Analysis']

# First we access the features that highly contributed to our probability for the Truth assignment
print("Evidence supporting P(Survival_Time >= {} | Evidence) == True:".format(survival_time))
for k,v in contribution_analysis['Survival_Time >= {} = True'.format(survival_time)].items():
    print('\t{} : {}'.format(k,v))
    
# Next we access the features that highly contributed to our probability for the False assignment
print("Evidence supporting P(Survival_Time >= {} | Evidence) == False:".format(survival_time))
for k,v in contribution_analysis['Survival_Time >= {} = False'.format(survival_time)].items():
    print('\t{} : {}'.format(k,v))
    

Evidence supporting P(Survival_Time >= 1000 | Evidence) == True:
	Age_of_Diagnosis 16134.063037672804 - 24576.605732043285 : 2.507397430072786e-05
	Stage_T == T1 : 2.4195048023416998e-05
	Stage_N == N0 : 2.3316121746106135e-05
	Stage_M == M0 : 2.2437195468795273e-05
	Drug_Name(s) == CYTOXAN = True : 2.155826919148441e-05
	Drug_Name(s) == DOXORUBICIN = True : 2.0679342914173548e-05
	Drug_Name(s) == TAMOXIFEN = True : 1.9800416636862685e-05
	mut_RAF1 = True : 1.5405785250308376e-05
	mut_MAP3K13 = True : 1.4648585598728777e-05
	mut_PIK3CA = True : 1.6361259288904592e-06
	mut_MAP3K13 = False : 1.6361259288904592e-06
	mut_TTN = True : 8.789262773108619e-07
	mut_RAF1 = False : 8.789262773108619e-07
Evidence supporting P(Survival_Time >= 1000 | Evidence) == False:
	Age_of_Diagnosis 17147.550755762844 - 26109.14719145123 : 3.29912588254105e-05
	Stage_T == T4 : 3.191891404128864e-05
	Stage_N == N3 : 3.084656925716678e-05
	Stage_M == M1 : 2.977422447304492e-05
	Drug_Name(s) == CYCLOPHOSPHAMIDE =