# /query and /predicate functionality for CHP

Queries for our system are in the probabilistic form of:<br>
$P(survival\_time > X | Mut\_g_1 = True, Mut\_g_2 = True, ..., Mut\_g_n = True, Drug = d_1, Drug = d_2, ..., Drug = d_n)$<br>
Returned is a knowledge graph containing probability of survival time and contribution analysis (if specified)

In [17]:
import requests
import json
import csv

# /predicate functionality example
By running /predicates you can extract a json object with the following predicates:<br>
1.) gene_to_disease_association<br>
2.) chemical_to_disease_or_phenotypic_feature_association<br>
3.) disease_to_phenotypic_association<br>

The above predicates match the following biolink entities:<br>
1.) gene<br>
2.) drug<br>
3.) disease<br>
4.) phenotypicfeature

In [18]:
r = requests.get('http://chp.thayer.dartmouth.edu/predicates/')
json_formatted_str = json.dumps(json.loads(r.content), indent=2)
print(json_formatted_str)

{
  "gene": {
    "disease": [
      "gene_to_disease_association"
    ]
  },
  "drug": {
    "disease": [
      "chemical_to_disease_or_phenotypic_feature_association"
    ]
  },
  "disease": {
    "phenotypicfeature": [
      "disease_to_phenotypic_association"
    ]
  }
}


# Build Query
constructs a json query object and can take in a survival time, a disease and a set of genes and drugs.

In [19]:
# Function: buildQuery
#
# Input:
# -----------
# list of gene and drug tuples
#
# Output:
# -----------
# A query graph that answers this probablistic question: 
# P(survival_time > X | Mut_g1 = True, Mut_g2 = True, ..., Mut_gn = True, Drug = d1, Drug = d2, ..., Drug = dn)

def buildQuery(st, disease, genes=[], drugs=[]):
    
    # empty response
    reasoner_std = { "query_graph": dict(),
                     "knowledge_graph": dict(),
                     "results": list()
                   }
    # empty query graph
    reasoner_std["query_graph"] = { "edges": dict(),
                                    "nodes": dict()
                                  }
    # empty knowledge graph
    reasoner_std["knowledge_graph"] = { "edges": dict(),
                                        "nodes": dict()
                                      }
    # empty response graph
    reasoner_std["results"] = [{ "node_bindings": dict(),
                                 "edge_bindings": dict()
                              }]
    
    node_count = 0
    edge_count = 0
    
    # add genes
    for gene in genes:
        reasoner_std['query_graph']['nodes']['n{}'.format(node_count)] = { 'type':'gene',
                                                                           'curie':'{}'.format(gene[1])
                                                                         }
        node_count += 1
    
    # add drugs
    for drug in drugs:
        reasoner_std['query_graph']['nodes']['n{}'.format(node_count)] = { 'type':'drug',
                                                                           'curie':'{}'.format(drug[1])
                                                                         }
        node_count += 1
    
    # add in disease node
    reasoner_std['query_graph']['nodes']['n{}'.format(node_count)] = { 'type':'disease',
                                                                       'curie':'{}'.format(disease[1])
                                                                     }
    node_count += 1
    
    # link all evidence to disease
    for node_id in reasoner_std['query_graph']['nodes'].keys():
        node = reasoner_std['query_graph']['nodes'][node_id]
        if node['type'] == 'gene':
            reasoner_std['query_graph']['edges']['e{}'.format(edge_count)] = { 'type':'gene_to_disease_association',
                                                                               'source_id': node_id,
                                                                               'target_id': 'n{}'.format(node_count -1)   # should be disease node
                                                                             }
            edge_count += 1
        elif node['type'] == 'drug':
            reasoner_std['query_graph']['edges']['e{}'.format(edge_count)] = { 'type':'chemical_to_disease_or_phenotypic_feature_association',
                                                                               'source_id': node_id,
                                                                               'target_id': 'n{}'.format(node_count -1)  # should be disease node
                                                                             }
            edge_count += 1
            
    # add target survival node
    phenotype = ('Survival_Time', 'EFO:0000714')
    reasoner_std['query_graph']['nodes']['n{}'.format(node_count)] = { 'type': 'phenotypicfeature',
                                                                       'curie': '{}'.format(phenotype[1]),
                                                                     }
    node_count += 1
    
    # link disease to target
    reasoner_std['query_graph']['edges']['e{}'.format(edge_count)] = { 'type':'disease_to_phenotype_association',
                                                                       'source_id': 'n{}'.format(node_count-2),
                                                                       'target_id': 'n{}'.format(node_count-1),
                                                                       # properties is optional - if not specified default 
                                                                       # days is 970 and default qualifier is '>='
                                                                       'properties': { 'qualifier':'>=',
                                                                                       'days': st
                                                                                       #'contributions':True    # uncomment to include contributions
                                                                                     }
                                                                     }
    return reasoner_std

# Read Genes and Drugs
Functionality to read in our set of available genes and drugs with respective ensemble and chembl curie IDs.

In [20]:
def readGenes():
    with open('gene_curie_map.csv', 'r') as gene_file:
        reader = csv.reader(gene_file)
        next(reader)
        rows = [(row[0],row[1]) for row in reader]
    return rows

In [21]:
def readDrugs():
    with open('drug_curie_map.csv', 'r') as drug_file:
        reader = csv.reader(drug_file)
        next(reader)
        rows = [(row[0],row[1]) for row in reader]
    return rows

# Constructing the Query and pinging CHP
You can use the commented out functionality to check which genes and drugs are available. Genes and drugs are passed in as a list of tuples. Currently only breast cancer can be used as the disease.

In [22]:
# list of genes (and curies) we can query over
#gene_list = readGenes()

# list of drugs (and curies) we can query over
#drug_list = readDrugs()

# or pull from above lists
genes = [('RAF1', 'ENSEMBL:ENSG00000132155'),
         ('MAP3K13','ENSEMBL:ENSG00000073803')]
drugs = [('CYCLOPHOSPHAMIDE', 'CHEMBL:CHEMBL88')]
disease = ('Breast_Cancer', 'MONDO:0007254')
survival_time = 970
# if genes or drugs is an empty list you can omit them
query = buildQuery(survival_time, disease, genes=genes, drugs=drugs)
payload = {'message': query}
r = requests.post('http://chp.thayer.dartmouth.edu/query/', json=payload)

# Extract end probabilitiy

In [23]:
chp_res = json.loads(r.content)
KG = chp_res['message']['knowledge_graph']
for edge_key in KG['edges'].keys():
    edge = KG['edges'][edge_key]
    if edge['type'] == 'disease_to_phenotype_association':
        p_survival = edge['has_confidence_level']

# probability of surival given QG specification
print("Probability of survival > {} days is:".format(survival_time), p_survival)

Probability of survival > 970 days is: 0.9938446232526468
