# Example Query for sensitive patients

Takes list of mutated genes and forms it into a mock patient profile. Queries out system in the form of:
P(survival_time > 1000 | g1_mut = True, g2_mut = True, ..., gn_mut = True). 
Returned is a knowledge graph contained probability of survival time and a description containing sensitive patients used in the inference.

In [1]:
import requests
import json
import csv

In [2]:
# Function: buildQuery
#
# Input:
# -----------
# list of gene tuples containing gene name and ENSEMBL Identifiers
#
# Output
# -----------
# A knowledge graph with decorated edges for 
# P(survival_time > 1000 | RAF1 = Mutated, BRAF = Mutated)
# and description containing sensitive patients dump

def buildQuery(genes):
    # empty response
    response = { "query_graph": dict(),
                 "knowledge_graph": dict(),
                 "response": dict()
               }

    # empty query graph
    response["query_graph"] = { "edges": [],
                                "nodes": []
                              }

    # empty knowledge graph
    response["knowledge_graph"] = { "edges": [],
                                    "nodes": []
                                  }

    # empty response graph
    response["results"] = { "node_bindings": [],
                            "edge_bindings": []
                          }

    # nodes
    nodeCount = 0
    # edges
    edgeCount = 0

    # add in evidence genes
    
    for g in genes:
        response['query_graph']['nodes'].append({ 'id':'n{}'.format(nodeCount),
                                                  'type':'Gene',
                                                  'name':'{}'.format(g[0]),
                                                  'curie':'{}'.format(g[1])
                                               })
        nodeCount += 1

    # grouping for genes
    response['query_graph']['nodes'].append({ 'id':'n{}'.format(nodeCount),
                                              'type':'gene_grouping'
                                           })
    nodeCount += 1
    
    # link genes over grouping
    for n in response['query_graph']['nodes'][:-1]:
        response['query_graph']['edges'].append({ 'id':'e{}'.format(edgeCount),
                                                  'type':'part_of',
                                                  'curie':['SEMMEDDB:PART_OF'],
                                                  'source_id':n['id'],
                                                  'target_id':'n{}'.format(nodeCount-1)
                                               })
        edgeCount += 1

    # patient node
    response['query_graph']['nodes'].append({ 'id':'n{}'.format(nodeCount),
                                              'type':'patient',
                                              'curie':['UMLSSC:T101']
                                           })
    nodeCount += 1

    # link gene group to patient
    response['query_graph']['edges'].append({ 'id':'e{}'.format(edgeCount),
                                              'type':'expressed_in',
                                              'curie':['RO:0002206'],
                                              'source_id':'n{}'.format(nodeCount-2),
                                              'target_id':'n{}'.format(nodeCount-1)
                                           })
    edgeCount += 1

    # survival node
    response['query_graph']['nodes'].append({ 'id': 'n{}'.format(nodeCount),
                                              'type': 'PhenotypicFeature',
                                              'curie': 'CHPDART:SURVIVAL',
                                              'operator': '>=',
                                              'value': '1000'
                                           })
    nodeCount += 1

    # link patient to survival
    response['query_graph']['edges'].append({ 'id':'e{}'.format(edgeCount),
                                              'type':'has_phenotype',
                                              'source_id':'n{}'.format(nodeCount-2),
                                              'target_id':'n{}'.format(nodeCount-1)
                                           })
    edgeCount += 1

    # BKB target
    response['probability_targets'] = [('Survival_Time', '>=', 1000)]

    return response

In [3]:
def readGenes():
    with open('gene_curie_map.csv', 'r') as gene_file:
        reader = csv.reader(gene_file)
        next(reader)
        rows = [row for row in reader]
    return rows

In [6]:
# list of genes we can query over
#gene_list = readGenes()
#print(gene_list)

# or pull from available list above
genes = [('RAF1','ENSEMBL:ENSG00000132155'),('BRAF','ENSEMBL:ENSG00000157764')]

response = buildQuery(genes)
response['reasoner_id'] = 'unsecret'
payload = {'query': response}
r = requests.post('http://chp.thayer.dartmouth.edu/submitQuery/', json=payload)
chp_res = json.loads(r.content)

QG = chp_res['query_graph']
KG = chp_res['knowledge_graph']
res = chp_res['results']

#sensitive patients
KG_result_node = res['node_bindings'][0]['kg_id']
for node in KG['nodes']:
    if node['id'] == KG_result_node:
        sensitive_patients = node['Description']
        p_survival = node['has_confidence_level']
        
# probability of surival given QG specification
print("Probability of survival > 1000 days is:", p_survival)

Probability of survival > 1000 days is: 0.3173173173173173


Our dictionary of sensitive patients is broken into two categories given our target question - P(survival_time > 1000). Below we gather our patient as those that were sensitive to the truth assignment of P(survival_time > 1000) and the false assignment. We also demonstrate how you can extract patient level data.

In [7]:
true_sensitive_patients = sensitive_patients['A = l']['Survival_Time >= 1000 = True']
false_sensitive_patients = sensitive_patients['A = l']['Survival_Time >= 1000 = False']

# patient IDs are the keys to access the patient data
print(true_sensitive_patients.keys())
# each patient has the following information
print(true_sensitive_patients['TCGA-GM-A2DO'].keys())
# here we index the drugs TCGA-GM-A2DO was given
print(true_sensitive_patients['TCGA-GM-A2DO']['Drug_Name(s)'])

dict_keys(['TCGA-5L-AAT1', 'TCGA-GM-A2DO', 'TCGA-AO-A0JC', 'TCGA-OL-A5RW', 'TCGA-E2-A150'])
dict_keys(['Patient_ID', 'Cancer_Type', 'Patient_Genes', 'Patient_Gene_Variants', 'Patient_Variants', 'Patient_Gene_Reads', 'Age_of_Diagnosis', 'Gender', 'PathT', 'PathN', 'PathM', 'Survival_Time', 'Drug_Name(s)', 'Biological_Object(s)', 'Process_Activity(s)', 'Process_Type(s)', 'Patient_Gene_Variant'])
['TAMOXIFEN']
