# Example Query for Survival Probability of 1-hop Queries

Queries our system in the form of:<br>
$P(survival\_time > 970 | Mut\_g_1 = True, Mut\_g_2 = True, ..., Mut\_g_n = True, Drug = d_1, Drug = d_2, ..., Drug = d_n)$<br>
Returned is a knowledge graph containing probability of survival time.

In [21]:
import requests
import json
import csv

In [22]:
# Function: buildQuery
#
# Input:
# -----------
# list of gene and drug tuples
#
# Output:
# -----------
# A query graph that answers this probablistic question: 
# P(survival_time > 1000 | Mut_g1 = True, Mut_g2 = True, ..., Mut_gn = True, Drug = d1, Drug = d2, ..., Drug = dn)
# Drugs can be left empty, but Genes CANNOT

def buildQuery(st, genes=[], drugs=[]):
    
    # empty response
    reasoner_std = { "query_graph": dict()}
    
    # empty query graph
    reasoner_std["query_graph"] = { "edges": [],
                                    "nodes": []
                                  }
    
    node_count = 0
    edge_count = 0
    
    # add genes
    for gene in genes:
        reasoner_std['query_graph']['nodes'].append({ 'id':'n{}'.format(node_count),
                                                      'type':'Gene',
                                                      'curie':'{}'.format(gene[1])
                                                   })
        node_count += 1
    
    # add drugs
    for drug in drugs:
        reasoner_std['query_graph']['nodes'].append({ 'id':'n{}'.format(node_count),
                                                      'type':'Drug',
                                                      'curie':'{}'.format(drug[1])
                                                   })
        node_count += 1
    
    # add in disease node
    disease = ('Breast_Cancer', 'MONDO:0007254')
    reasoner_std['query_graph']['nodes'].append({ 'id':'n{}'.format(node_count),
                                                  'type':'disease',
                                                  'curie':'{}'.format(disease[1])
                                                })
    node_count += 1
    
    # link all evidence to disease
    for node in reasoner_std['query_graph']['nodes']:
        if node['type'] == 'Gene':
            id = node['id']
            reasoner_std['query_graph']['edges'].append({ 'id':'e{}'.format(edge_count),
                                                          'type':'gene_to_disease_association',
                                                          'source_id':'{}'.format(id),
                                                          'target_id':'n{}'.format(node_count-1)
                                                       })
            edge_count += 1
        elif node['type'] == 'Drug':
            id = node['id']
            reasoner_std['query_graph']['edges'].append({ 'id':'e{}'.format(edge_count),
                                                          'type':'chemical_to_disease_or_phenotypic_feature_association',
                                                          'source_id':'{}'.format(id),
                                                          'target_id':'n{}'.format(node_count-1)
                                                       })
            edge_count += 1
            
    # add target survival node
    phenotype = ('Survival_Time', 'EFO:0000714')
    reasoner_std['query_graph']['nodes'].append({ 'id': 'n{}'.format(node_count),
                                                  'type': 'PhenotypicFeature',
                                                  'curie': '{}'.format(phenotype[1]),
                                                })
    node_count += 1
    
    # link disease to target
    reasoner_std['query_graph']['edges'].append({ 'id':'e{}'.format(edge_count),
                                                  'type':'disease_to_phenotype_association',
                                                  'value':st,
                                                  'source_id':'n{}'.format(node_count-2),
                                                  'target_id':'n{}'.format(node_count-1)
                                                })
    return reasoner_std

In [23]:
def readGenes():
    with open('gene_curie_map.csv', 'r') as gene_file:
        reader = csv.reader(gene_file)
        next(reader)
        rows = [row for row in reader]
    return rows

In [24]:
def readDrugs():
    with open('drug_curie_map.csv', 'r') as drug_file:
        reader = csv.reader(drug_file)
        next(reader)
        rows = [row for row in reader]
    return rows

In [26]:
# list of genes (and curies) we can query over
#gene_list = readGenes()

# list of drugs (and curies) we can query over
#drug_list = readDrugs()

# or pull from above lists
genes = [('RAF1', 'ENSEMBL:ENSG00000132155'),
         ('MAP3K13','ENSEMBL:ENSG00000073803')]
drugs = []
survival_time = 970
# if genes or drugs is an empty list you can omit them
query = buildQuery(survival_time, genes=genes, drugs=drugs)
query['reasoner_id'] = 'unsecret'
payload = {'query': query}

r = requests.post('http://chp.thayer.dartmouth.edu/submitQuery/', json=payload)
chp_res = json.loads(r.content)

QG = chp_res['query_graph']
KG = chp_res['knowledge_graph']
res = chp_res['results']

# extract probability
for edge in KG['edges']:
    if edge['type'] == 'disease_to_phenotype_association':
        p_survival = edge['has_confidence_level']
        Contribution = edge['Description']

# probability of surival given QG specification
print("Probability of survival > 970 days is:", p_survival)

Probability of survival > 970 days is: 0.006155376747353221


We can access the feature contributions to our query by navigating the Contriution report. Contributions are ranked by most contributing to least under each truth assignment.

In [20]:
contribution_analysis = Contribution['Contribution Analysis']

# First we access the features that highly contributed to our probability for the Truth assignment
print("Evidence supporting P(Survival_Time >= 970 | Evidence) == True:")
for k,v in contribution_analysis['Survival_Time >= 970 = True'].items():
    print('\t{} : {}'.format(k,v))
    
# Next we access the features that highly contributed to our probability for the False assignment
print("Evidence supporting P(Survival_Time >= 970 | Evidence) == False:")
for k,v in contribution_analysis['Survival_Time >= 970 = False'].items():
    print('\t{} : {}'.format(k,v))

Evidence supporting P(Survival_Time >= 970 | Evidence) == True:
	Age_of_Diagnosis 21113.81249257478 - 29560.278692835553 : 2.581601925858197e-05
	Stage_T == T1 : 2.4880703020252804e-05
	Stage_N == N0 : 2.3945386781923637e-05
	Stage_M == M0 : 2.301007054359447e-05
	Drug_Name(s) == CYTOXAN = True : 2.2074754305265302e-05
	Drug_Name(s) == TAMOXIFEN = True : 2.1139438066936135e-05
	Drug_Name(s) == DOXORUBICIN = True : 2.020412182860697e-05
	mut_RAF1 = True : 1.5527540636961135e-05
	mut_MAP3K13 = True : 1.4692712857683897e-05
	mut_PIK3CA = True : 1.7701440176064054e-06
	mut_MAP3K13 = False : 1.7701440176064054e-06
	mut_TTN = True : 9.353162383291668e-07
	mut_RAF1 = False : 9.353162383291668e-07
Evidence supporting P(Survival_Time >= 970 | Evidence) == False:
	Age_of_Diagnosis 17217.23703040372 - 26168.902787225456 : 3.212108479294976e-05
	Stage_T == T4 : 3.1111482330903835e-05
	Stage_N == N3 : 3.0101879868857913e-05
	Stage_M == M1 : 2.909227740681199e-05
	Drug_Name(s) == CYCLOPHOSPHAMIDE = 