In [6]:
# imports
import csv 
import pandas as pd 
import os
import requests 
import time
import logging
import json


In [13]:
# constants
# gene/disease qualifiers
# querying for subject disease decreased severity
# looking object gene decreased activity
list_gene_qualifiers = [
{"qualifier_type_id": "object_aspect_qualifier", "qualifier_value": "activity"},
{"qualifier_type_id": "object_direction_qualifier", "qualifier_value": "decreased"},
{"qualifier_type_id": "subject_aspect_qualifier", "qualifier_value": "severity"},
{"qualifier_type_id": "subject_direction_qualifier", "qualifier_value": "decreased"}    
]
# molecular qualifiers

# trapi constants
type_gene = 'biolink:Gene'
type_disease = 'biolink:Disease'
type_molecule = 'biolink:SmallMolecule'
type_predicate = 'biolink:related_to'

# servers
url_genepro = "https://translator.broadinstitute.org/genetics_provider/trapi/v1.3/query"

In [18]:
# variables
# disease 
curie_disease = 'MONDO:0021765' # radiculitis
curie_disease = 'MONDO:0004975' # alzheimer's


In [50]:
# __main__
# query genetics kp
map_gen_nodes, map_gen_edges = get_nodes_edges_one_hop(url=url_genepro, list_source=[curie_disease], list_target=[], list_source_categories=[type_disease], list_target_categories=[type_gene], list_predicates=[type_predicate], log=True)

build trapi payload: 
{
    "message": {
        "query_graph": {
            "edges": {
                "e00": {
                    "subject": "n00",
                    "object": "n01",
                    "predicates": [
                        "biolink:related_to"
                    ]
                }
            },
            "nodes": {
                "n00": {
                    "ids": [
                        "MONDO:0004975"
                    ],
                    "categories": [
                        "biolink:Disease"
                    ]
                },
                "n01": {
                    "categories": [
                        "biolink:Gene"
                    ]
                }
            }
        }
    }
}
query: https://translator.broadinstitute.org/genetics_provider/trapi/v1.3/query
got results from: https://translator.broadinstitute.org/genetics_provider/trapi/v1.3/query
got node count: 423 and edge_count: 450


In [51]:
# filter the genetics KP results based on the qualifiers
# querying for subject disease decreased severity
# looking object gene decreased activity
map_gen_filtered_edges = filter_edges_by_qualifier(map_gen_edges, list_gene_qualifiers)
print("for genetics filtered edges rowcount: {}".format(len(map_gen_filtered_edges)))




for genetics filtered edges rowcount: 0


In [None]:
# get the genes from the filtered genetics kp edges


In [39]:
# methods - trapi
def get_nodes_edges_one_hop(url, list_source, list_target, list_source_categories, list_target_categories, list_predicates, log=False):
    ''' method to query a trapi url and get the resulting node list back '''

    # query
    json_response = query_one_hop(url, list_source, list_target, list_source_categories, list_target_categories, list_predicates, log)

    # loop and build the list
    map_nodes = json_response.get("message").get("knowledge_graph").get("nodes")
    map_edges = json_response.get("message").get("knowledge_graph").get("edges")

    # log
    if log:
        # print("got {} resulting nodes: {}".format(len(list_result), list_result))
        print("got node count: {} and edge_count: {}".format(len(map_nodes), len(map_edges)))

    # return
    return map_nodes, map_edges

def query_one_hop(url, list_source, list_target, list_source_categories, list_target_categories, list_predicates, log=False):
    ''' method to call a trapi url '''
    response = None

    # build the payload
    payload = build_one_hop_payload(list_source, list_target, list_source_categories, list_target_categories, list_predicates, log=log)

    # call the url
    print("query: {}".format(url))
    response = requests.post(url, json=payload)
    output_json = response.json()
    print("got results from: {}".format(url))

    # log
    # if log:
    #     logger.info("got response: {}".format(output_json))

    # return the json
    return output_json

def build_one_hop_payload(list_source, list_target, list_source_categories, list_target_categories, list_predicates, log=False):
    ''' method to build a one hop json payload for a trapi query '''
    payload = {}

    # build the payload
    nodes = {"n00": build_trapi_query_node(list_source, list_source_categories, log=True), "n01": build_trapi_query_node(list_target, list_target_categories, log=True)}
    edge = {"subject": "n00", "object": "n01"}
    if list_predicates and len(list_predicates) > 0:
        edge["predicates"]= list_predicates
    edges = {"e00": edge}
    payload["message"] = {"query_graph": {"edges": edges, "nodes": nodes}}

    # log
    if log:
        print("build trapi payload: \n{}".format(json.dumps(payload, indent=4)))

    # return
    return payload

def build_trapi_query_node(list_source, list_source_categories, log=False):
    ''' method to build a trapi query node '''
    node = {}

    # log
    # if log:
    #     logger.info("got id: {} and categories: {}".format(list_source, list_source_categories))

    # build the node
    if list_source and len(list_source) > 0:
        node['ids'] = list_source
    if list_source_categories and len(list_source_categories) > 0:
        node['categories'] = list_source_categories

    # return
    return node

In [48]:
# methods
# method to take edges and return only those with specified qualifiers
def filter_edges_by_qualifier(map_edges, list_qualifiers, log=False):
  """take edges and return only those with specified qualifiers"""
  # return only subject/object
  map_filtered_edges = {}

  # loop (not pretty)
  # for each edge in edges map, get the qualifiers list
  for key, value in map_edges.items():
    if value.get('qualifiers'):
      if is_subset(list_gene_qualifiers, value.get('qualifiers')):
        # add to result
        map_filtered_edges[key] = value

  # return
  return map_filtered_edges

# compare qualifiers
def is_subset(list_one, list_two):
  '''returns true if list 1 subset of list 2'''
  is_subset = True

  print("comparing list 1: {}".format(list_one))
  print("comparing list 2: {}".format(list_two))

  # TODO - refactor from brute force
  for row_one in list_one:
    found_row = False
    for row_two in list_two:
      if row_one == row_two:
        found_row = True
        break

    # if one row now found, false and break
    if not found_row:
      is_subset = False
      break

  # return
  return is_subset

# method to return nodes with qualifiers (map gene, chem, qualifiers)


In [35]:
# scratch code
list_test = [{"qualifier_type_id": "object_direction_qualifier", "qualifier_value": "decreased"}]
result = is_subset(list_test, list_gene_qualifiers)
result


# {"qualifier_type_id": "object_direction_qualifier", "qualifier_value": "decreased"} == {"qualifier_type_id": "object_direction_qualifier", "qualifier_value": "decreased"}

True

Phenotypes that we loaded that ameliorates disease 


```
+-----+---------------+----------------------+
| num | curie         | name                 |
+-----+---------------+----------------------+
|   1 | MONDO:0004242 | active peptic ulcer  |
|   4 | MONDO:0004781 | acute myocardial inf |
|   2 | MONDO:0002280 | anemia               |
|   3 | MONDO:0002519 | anus disease         |
|   5 | EFO:0006788   | anxiety disorder     |
|   2 | MONDO:0004979 | asthma               |
|   4 | MONDO:0004981 | atrial fibrillation  |
|   1 | MONDO:0000465 | atrioventricular blo |
|   1 | MONDO:0005229 | bacterial infectious |
|   3 | EFO:0000284   | benign prostatic hyp |
|   1 | MONDO:0004985 | bipolar disorder     |
|   1 | EFO:0004138   | bundle branch block  |
|   5 | EFO:0004269   | cardiac arrhythmia   |
|   2 | MONDO:0004995 | cardiovascular disea |
|   1 | MONDO:0005129 | cataract             |
|   2 | EFO:0003763   | cerebrovascular diso |
|   2 | MONDO:0012672 | cholelithiasis       |
|   1 | EFO:0009538   | chronic inflammatory |
|   1 | MONDO:0005300 | chronic kidney disea |
|   2 | MONDO:0005009 | congestive heart fai |
|   1 | MONDO:0004596 | cor pulmonale        |
|   1 | MONDO:0002175 | degeneration of macu |
|   2 | MONDO:0002040 | dermatomycosis       |
|   6 | MONDO:0005015 | diabetes mellitus    |
|   2 | EFO:0009959   | diverticular disease |
|   1 | EFO:0009666   | enthesopathy         |
|   1 | EFO:1000695   | erythematosquamous d |
|   3 | MONDO:0003749 | esophageal disease   |
|   1 | MONDO:0001409 | esophagitis          |
|   1 | MONDO:0004830 | fasciitis            |
|   2 | EFO:0009523   | fecal incontinence   |
|   3 | EFO:0009549   | female reproductive  |
|   1 | MONDO:0015978 | functional neutrophi |
|   1 | EFO:0000217   | gastritis            |
|   1 | MONDO:0005041 | glaucoma             |
|   3 | EFO:0009550   | headache disorder    |
|   4 | MONDO:0005267 | heart disease        |
|   1 | EFO:0005803   | hematologic disease  |
|   1 | MONDO:0005161 | human papilloma viru |
|  16 | MONDO:0005044 | hypertensive disorde |
|   5 | MONDO:0005420 | hypothyroidism       |
|   2 | MONDO:0005046 | immune system diseas |
|   1 | MONDO:0005265 | inflammatory bowel d |
|   2 | MONDO:0001356 | iron deficiency anem |
|  10 | EFO:0001425   | ischemic cardiomyopa |
|   2 | EFO:1000999   | joint disease        |
|   1 | MONDO:0001106 | kidney failure       |
|   2 | EFO:0001421   | liver disease        |
|   1 | MONDO:0008903 | lung cancer          |
|   2 | MONDO:0002013 | lymphangioma         |
|   1 | MONDO:0005277 | migraine disorder    |
|   2 | MONDO:0000226 | mineral metabolism d |
|  10 | MONDO:0005371 | mood disorder        |
|   3 | EFO:0000618   | nervous system disea |
|   1 | MONDO:0001658 | nontoxic goiter      |
|   2 | MONDO:0011122 | obesity disorder     |
|   5 | MONDO:0002267 | obstructive lung dis |
|   3 | EFO:0003918   | obstructive sleep ap |
|   6 | MONDO:0005178 | osteoarthritis       |
|   1 | EFO:0009493   | paroxysmal tachycard |
|   3 | MONDO:0004247 | peptic ulcer disease |
|   1 | EFO:0009816   | perineal laceration  |
|   1 | EFO:0003875   | peripheral vascular  |
|   1 | MONDO:0005906 | peritonsillar absces |
|   1 | MONDO:0000594 | pervasive developmen |
|   3 | EFO:0003106   | pneumonia            |
|   1 | EFO:0009275   | premature cardiac co |
|   1 | MONDO:0021765 | radiculitis          |
|   2 | EFO:0009686   | respiratory failure  |
|   1 | MONDO:0000376 | respiratory system c |
|   2 | EFO:0003839   | retinopathy          |
|   2 | MONDO:0005324 | seasonal allergic rh |
|   3 | EFO:1000763   | sebaceous gland dise |
|   2 | EFO:0000701   | skin disease         |
|   2 | EFO:0004198   | skin neoplasm        |
|   1 | EFO:0003877   | sleep apnea          |
|   2 | EFO:0009608   | stomach disease      |
|   1 | EFO:1001216   | tooth disease        |
|   1 | MONDO:0002220 | tooth hard tissue di |
|   1 | MONDO:0021353 | tumor of uterus      |
|   1 | MONDO:0005147 | type 1 diabetes mell |
|   4 | MONDO:0005148 | type 2 diabetes mell |
|   1 | EFO:0009514   | upper extremity frac |
|   2 | MONDO:0002118 | urinary system disea |
|   1 | MONDO:0008638 | varicose disease     |
+-----+---------------+----------------------+
85 rows in set (1 min 50.19 sec)

```



In [49]:
# test map filtering
test_edge =                 {"600k_63416_12596MONDO:0021765NCBIGene:78987": {
                    "attributes": [
                        {
                            "attribute_source": "infores:genetics-data-provider",
                            "attribute_type_id": "biolink:aggregator_knowledge_source",
                            "description": "The Genetics Data Provider KP from NCATS Translator",
                            "value": "infores:genetics-data-provider",
                            "value_type_id": "biolink:InformationResource",
                            "value_url": "https://translator.broadinstitute.org/genetics_provider/trapi/v1.3"
                        },
                        {
                            "attribute_type_id": "biolink:p_value",
                            "original_attribute_name": "pValue",
                            "value": 0.000194
                        }
                    ],
                    "object": "NCBIGene:78987",
                    "predicate": "biolink:condition_associated_with_gene",
                    "qualifiers": [
                        {
                            "qualifier_type_id": "object_aspect_qualifier",
                            "qualifier_value": "activity"
                        },
                        {
                            "qualifier_type_id": "object_direction_qualifier",
                            "qualifier_value": "decreased"
                        },
                        {
                            "qualifier_type_id": "qualified_predicate",
                            "qualifier_value": "caused by"
                        },
                        {
                            "qualifier_type_id": "subject_aspect_qualifier",
                            "qualifier_value": "severity"
                        },
                        {
                            "qualifier_type_id": "subject_direction_qualifier",
                            "qualifier_value": "decreased"
                        }
                    ],
                    "subject": "MONDO:0021765"
                }}

map_edges = filter_edges_by_qualifier(test_edge, list_gene_qualifiers)
print("got filtered count of: {}".format(len(map_edges)))

comparing list 1: [{'qualifier_type_id': 'object_aspect_qualifier', 'qualifier_value': 'activity'}, {'qualifier_type_id': 'object_direction_qualifier', 'qualifier_value': 'decreased'}, {'qualifier_type_id': 'subject_aspect_qualifier', 'qualifier_value': 'severity'}, {'qualifier_type_id': 'subject_direction_qualifier', 'qualifier_value': 'decreased'}]
comparing list 2: [{'qualifier_type_id': 'object_aspect_qualifier', 'qualifier_value': 'activity'}, {'qualifier_type_id': 'object_direction_qualifier', 'qualifier_value': 'decreased'}, {'qualifier_type_id': 'qualified_predicate', 'qualifier_value': 'caused by'}, {'qualifier_type_id': 'subject_aspect_qualifier', 'qualifier_value': 'severity'}, {'qualifier_type_id': 'subject_direction_qualifier', 'qualifier_value': 'decreased'}]
got filtered count of: 1
