# Example Query for sensitive patients

Takes list of mutated genes and forms it into a mock patient profile. Queries out system in the form of:
P(survival_time > 1000 | g1_mut = True, g2_mut = True, ..., gn_mut = True). 
Returned is a knowledge graph contained probability of survival time and a description containing sensitive patients used in the inference.

In [1]:
import requests
import json

In [2]:
# Function: buildQuery
#
# Input:
# -----------
# list of gene tuples containing gene name and ENSEMBL Identifiers
#
# Output
# -----------
# A knowledge graph with decorated edges for 
# P(survival_time > 1000 | RAF1 = Mutated, BRAF = Mutated)
# and description containing sensitive patients dump

def buildQuery(genes):
    # empty response
    response = { "query_graph": dict(),
                 "knowledge_graph": dict(),
                 "response": dict()
               }

    # empty query graph
    response["query_graph"] = { "edges": [],
                                "nodes": []
                              }

    # empty knowledge graph
    response["knowledge_graph"] = { "edges": [],
                                    "nodes": []
                                  }

    # empty response graph
    response["results"] = { "node_bindings": [],
                            "edge_bindings": []
                          }

    # nodes
    nodeCount = 0
    # edges
    edgeCount = 0

    # add in evidence genes
    
    for g in genes:
        response['query_graph']['nodes'].append({ 'id':'n{}'.format(nodeCount),
                                                  'type':'Gene',
                                                  'name':'{}'.format(g[0]),
                                                  'curie':'{}'.format(g[1])
                                               })
        nodeCount += 1

    # grouping for genes
    response['query_graph']['nodes'].append({ 'id':'n{}'.format(nodeCount),
                                              'type':'gene_grouping'
                                           })
    nodeCount += 1
    
    # link genes over grouping
    for n in response['query_graph']['nodes'][:-1]:
        response['query_graph']['edges'].append({ 'id':'e{}'.format(edgeCount),
                                                  'type':'part_of',
                                                  'curie':['SEMMEDDB:PART_OF'],
                                                  'source_id':n['id'],
                                                  'target_id':'n{}'.format(nodeCount-1)
                                               })
        edgeCount += 1

    # patient node
    response['query_graph']['nodes'].append({ 'id':'n{}'.format(nodeCount),
                                              'type':'patient',
                                              'curie':['UMLSSC:T101']
                                           })
    nodeCount += 1

    # link gene group to patient
    response['query_graph']['edges'].append({ 'id':'e{}'.format(edgeCount),
                                              'type':'expressed_in',
                                              'curie':['RO:0002206'],
                                              'source_id':'n{}'.format(nodeCount-2),
                                              'target_id':'n{}'.format(nodeCount-1)
                                           })
    edgeCount += 1

    # survival node
    response['query_graph']['nodes'].append({ 'id': 'n{}'.format(nodeCount),
                                              'type': 'PhenotypicFeature',
                                              'curie': 'CHPDART:SURVIVAL',
                                              'operator': '>=',
                                              'value': '1000'
                                           })
    nodeCount += 1

    # link patient to survival
    response['query_graph']['edges'].append({ 'id':'e{}'.format(edgeCount),
                                              'type':'has_phenotype',
                                              'source_id':'n{}'.format(nodeCount-2),
                                              'target_id':'n{}'.format(nodeCount-1)
                                           })
    edgeCount += 1

    # BKB target
    response['probability_targets'] = [('Survival_Time', '>=', 1000)]

    return response

In [3]:
def readGenes():
    f = open('genes.txt', 'r')
    gene_list = f.read()
    gene_list = gene_list.split(',')
    return gene_list

In [9]:
# list of genes we can query over
#gene_list = readGenes()
#print(gene_list)

genes = [('RAF1','[ENSEMBL:ENSG00000132155.12]'),('BRAF','[ENSEMBL:ENSG00000157764.13]')]

response = buildQuery(genes)
response['reasoner_id'] = 'unsecret'
payload = {'query': response}
r = requests.post('http://chp.thayer.dartmouth.edu/submitQuery/', json=payload)
chp_res = json.loads(r.content)

QG = chp_res['query_graph']
KG = chp_res['knowledge_graph']
res = chp_res['results']

#sensitive patients
KG_result_node = res['node_bindings'][0]['kg_id']
for node in KG['nodes']:
    if node['id'] == KG_result_node:
        sensitive_patients = node['Description']
        p_survival = node['has_confidence_level']
        
# probability of surival given QG specification
print(p_survival)

# Text dump of sensitive patients.
# Each patient contains patient data accessed through the following keys
# 'Patient_ID'
# 'Cancer_Type'
# 'Patient_Genes'
# 'Patient_Gene_Variants'
# 'Patient_Variants'
# 'Age_of_Diagnosis'
# 'Survival_Time'
# 'Gender'
# 'Drug_Name(s)'
# 'Biological_Object(s)'
# 'Process_Activity(s)'
# 'Process_Types(s)'
# 'PathM'
# 'PathN'
# 'PathT'
print(sensitive_patients)


0.3173173173173173
{'A = l': {'Survival_Time >= 1000 = True': {'TCGA-5L-AAT1': {'Patient_ID': 'TCGA-5L-AAT1', 'Cancer_Type': 'TCGA-BRCA', 'Patient_Genes': ['AARS', 'ABCA1', 'ABCA13', 'ABCA3', 'ABCA8', 'ABCB4', 'ABCB6', 'ABCC6', 'ABCD2', 'ABCF1', 'ABCG2', 'ABHD4', 'AC233702.1', 'ACACA', 'ACACB', 'ACKR3', 'ACO2', 'ACTBL2', 'ADAM32', 'ADAM9', 'ADAMTS1', 'ADAMTS13', 'ADAMTS16', 'ADAMTS17', 'ADAMTS3', 'ADAR', 'ADCK2', 'ADCY2', 'ADCY8', 'ADGB', 'ADGRB3', 'ADGRE2', 'ADGRL1', 'ADGRV1', 'ADPRH', 'ADRA1A', 'ADRA2A', 'ADRB2', 'AEBP1', 'AFF1', 'AGBL3', 'AGPAT4', 'AGPS', 'AGRN', 'AHNAK', 'AIM1', 'AK7', 'AK8', 'AKIP1', 'AKR1B10', 'AKR1C1', 'AKR1C2', 'AKR1E2', 'AKT3', 'AL354820.1', 'ALDH7A1', 'ALOX15', 'ALPK2', 'ALX4', 'AMD1', 'AMER1', 'AMMECR1L', 'ANAPC1', 'ANAPC4', 'ANGEL2', 'ANGPTL3', 'ANK1', 'ANKFY1', 'ANKRD12', 'ANKRD30A', 'ANKRD33', 'ANKRD36B', 'ANKRD36C', 'AP1M2', 'AP2A2', 'AP5S1', 'APBA1', 'APC', 'APOBR', 'APOC2', 'APPBP2', 'APPL1', 'ARAF', 'ARHGAP24', 'ARHGAP31', 'ARHGAP39', 'ARHGEF11', 'ARH