# Setup

In [1]:
import json
import requests
import copy
from datetime import datetime as dt
from collections import defaultdict
import pandas as pd

In [2]:
#https://pypi.org/project/gamma-viewer/
from gamma_viewer import GammaViewer
from IPython.display import display, Markdown

In [3]:
def printjson(j):
    print(json.dumps(j,indent=4))
def print_json(j):
    printjson(j)

In [4]:
def post(name,url,message,params=None):
    """A simple function for posting to a URL and returning the json response"""
    if params is None:
        response = requests.post(url,json=message)
    else:
        response = requests.post(url,json=message,params=params)
    if not response.status_code == 200:
        print(name, 'error:',response.status_code)
        print(response)
        print(response.json())
        return {}
    return response.json()

In [5]:
def coalesce(em):
    return post('coalesce','https://answercoalesce.renci.org/1.1/coalesce/all',em)

In [6]:
def normalize(ids):
    j = {'curies':ids}
    result = post('nn','https://nodenormalization-sri.renci.org/1.1/get_normalized_nodes',j)
    new_ids = [ v['id']['identifier'] for k,v in result.items() ]
    return new_ids

In [23]:
def enrich(biolink_category, identifiers):
    identifiers = normalize(identifiers)
    em={'message': 
         {
             'query_graph':
                  {
                    "nodes": {
                        "n1": {
                            "categories": [ biolink_category ]
                            }
                    },
                    "edges":{}  
                  },
              'knowledge_graph': {
                  'nodes': {},
                  'edges': {}
                  },
              'results': []
        }
    }
    for n in identifiers:
        em['message']['knowledge_graph']['nodes'][n]= {'category':[biolink_category]}
        em['message']['results'].append( {'node_bindings': {'n1':[{"id": n}]}, 'edge_bindings': {} })
    return coalesce(em)

In [18]:
def ac_to_table(aragorn_result,mnode):
    #scores = []
    answer_node_count = []
    merged_count = []
    method = []
    extra = []
    for res_i, result in enumerate(aragorn_result['message']['results']):
        #scores.append(result['score'])
        answer_node_count.append(len(result['node_bindings']))
        merged_count.append(len(result['node_bindings'][mnode]))
        try:
            method.append(result['node_bindings'][mnode][0]['coalescence_method'])
        except:
            method.append('Original')
    df = pd.DataFrame({'N_Answer_Nodes':answer_node_count, 'N_Merged_Nodes':merged_count, 'Method':method})
    return df

def filter_to_simple(aragorn_result,mnode):
    simple_result = copy.deepcopy(aragorn_result)
    simple_result['message']['results'] = list(
    filter( lambda x: 'coalescence_method' not in x['node_bindings'][mnode][0], 
           aragorn_result['message']['results'])
    )
    print(len(simple_result['message']['results']))
    return simple_result

def print_nodenames(simple_result,qnode):
    #Print the names of the answers
    for result in simple_result['message']['results']:
        #Each answer has an identifier:
        n1_id = result['node_bindings'][qnode][0]['id']
        #The information for that identifier is in the KG:
        node = simple_result['message']['knowledge_graph']['nodes'][n1_id]
        #Each node has a name
        print(node['name'])

def filter_to_coal(aragorn_result,mnode,method):
    #The results that have been coalesced:
    coalesced_results = list(
        filter( lambda x: 'coalescence_method'  in x['node_bindings'][mnode][0], 
               aragorn_result['message']['results'])
    )
    #Those that have been coalesced via a new node (graph coalescence)
    graph_coalesced_results = list(
        filter( lambda x: x['node_bindings'][mnode][0]['coalescence_method'] == method, coalesced_results)
    )
    print(len(graph_coalesced_results))
    simple_result = copy.deepcopy(aragorn_result)
    simple_result['message']['results'] = graph_coalesced_results
    return simple_result

def filter_to_gc(aragorn_result,mnode):
    return filter_to_coal(aragorn_result,mnode,'graph_enrichment')

def filter_to_pc(aragorn_result,mnode):
    return filter_to_coal(aragorn_result,mnode,'property_enrichment')

In [19]:
def print_gc_result(graph,gc_result):
    print('p_value:', gc_result['node_bindings']['n1'][0]['p_value'])
    for eb in gc_result['edge_bindings']['extra_qe_0']:
        kge = graph['edges'][eb['id']]
        subject_node = kge['subject']
        object_node = kge['object']
        pred = kge['predicate']
        print( f"{graph['nodes'][subject_node]['name']} -[{pred}]-> {graph['nodes'][object_node]['name']}")

Here's the actual thing:

In [20]:
answer = enrich('biolink:Gene',['NCBIGene:191',
 'NCBIGene:55832',
 'NCBIGene:645',
 'NCBIGene:54884',
 'NCBIGene:8239',
 'NCBIGene:4175',
 'NCBIGene:10469',
 'NCBIGene:8120',
 'NCBIGene:3840',
 'NCBIGene:55705',
 'NCBIGene:2597',
 'NCBIGene:23066',
 'NCBIGene:7514',
 'NCBIGene:10128'])

In [21]:
ac_to_table(answer,'n1')

Unnamed: 0,N_Answer_Nodes,N_Merged_Nodes,Method
0,2,8,graph_enrichment
1,2,8,graph_enrichment
2,2,10,graph_enrichment
3,2,12,graph_enrichment
4,2,12,graph_enrichment
5,2,14,graph_enrichment
6,2,9,graph_enrichment
7,2,9,graph_enrichment
8,2,9,graph_enrichment
9,2,5,graph_enrichment


In [22]:
x = filter_to_gc(answer,'n1')

15


In [52]:
for i,result in enumerate(x['message']['results']):
    print('Result',i)
    print_gc_result(x['message']['knowledge_graph'],result)

Result 0
p_value: 1.9150723523368846e-15
blood -[biolink:expresses]-> TIMM44
blood -[biolink:expresses]-> RETSAT
blood -[biolink:expresses]-> LRPPRC
blood -[biolink:expresses]-> GAPDH
blood -[biolink:expresses]-> BLVRB
blood -[biolink:expresses]-> XPO1
blood -[biolink:expresses]-> MCM6
blood -[biolink:expresses]-> USP9X
blood -[biolink:expresses]-> AP3B2
blood -[biolink:expresses]-> CAND2
blood -[biolink:expresses]-> AHCY
blood -[biolink:expresses]-> IPO9
blood -[biolink:expresses]-> KPNA4
Result 1
p_value: 2.3920933258387157e-15
medulla oblongata -[biolink:expresses]-> GAPDH
medulla oblongata -[biolink:expresses]-> KPNA4
medulla oblongata -[biolink:expresses]-> USP9X
medulla oblongata -[biolink:expresses]-> RETSAT
medulla oblongata -[biolink:expresses]-> TIMM44
medulla oblongata -[biolink:expresses]-> LRPPRC
medulla oblongata -[biolink:expresses]-> CAND2
medulla oblongata -[biolink:expresses]-> BLVRB
medulla oblongata -[biolink:expresses]-> MCM6
medulla oblongata -[biolink:expresses]-