### Import api_crawler

[Code for api_crawler](https://github.com/biothings/JSON-LD_BioThings_API_DEMO/blob/master/src/api_crawler.py)

In [1]:
from api_crawler import uri_query

### Given a variant hgvs id, looking for ncbi gene id related to it

In [2]:
uri_query(input_value='chr12:g.103234255C>T', input_type='http://identifiers.org/hgvs/', output_type='http://identifiers.org/ncbigene/')

1 api(s) could be utilized based on the input_type, output_type you provided: ['myvariant.info']
myvariant.info returns the following results based on your query: 5053


[{'myvariant.info': '5053'}]

### From the ncbi gene we found in privious query, get all wikipathways ids related

In [3]:
uri_query(input_value='5053', input_type='http://identifiers.org/ncbigene/', output_type='http://identifiers.org/wikipathways/')

1 api(s) could be utilized based on the input_type, output_type you provided: ['mygene.info']
mygene.info returns the following results based on your query: ['WP706', 'WP550']


[{'mygene.info': ['WP706', 'WP550']}]

### Breakdown of uri_query function

The following code shows each step involved in uri_query function demonstrated above. 

[Metadata information about BioThings API(config)](https://github.com/biothings/JSON-LD_BioThings_API_DEMO/blob/master/src/config.py)
[code for biothings_helper](https://github.com/biothings/JSON-LD_BioThings_API_DEMO/blob/master/src/biothings_helper.py)

#### Step 1: Specify input and output

In [4]:
input_value = '5053'
input_type='http://identifiers.org/ncbigene/'
output_type='http://identifiers.org/wikipathways/'

#### Step 2: Iterate through API metadata info, and find corresponding API based on input & output

In [5]:
from config import AVAILABLE_API_SOURCES
from biothings_helper import find_id_from_uri, find_value_from_output_type, query_ids_from_output_type
from api_crawler import api_lookup

In [6]:
# convert to internal input name and output name
input_name = find_id_from_uri(input_type)
output_name = find_id_from_uri(output_type)
# look up api in api metadata info
api_results = api_lookup(input_name, output_name)
print(api_results)

[{'type': 'annotate', 'api': 'mygene.info'}]


#### Step 3: Make API call

In [20]:
# construct url based on metadata info
url = AVAILABLE_API_SOURCES[api_results[0]['api']]['annotate_syntax'].replace('*', input_value)
# make API call
import requests
doc = requests.get(url).json()
print(doc)

{'exac.all.exp_mis': 147.808010031, 'interpro.desc': ['Aromatic amino acid hydroxylase, iron/copper binding site', 'ACT domain', 'Aromatic amino acid hydroxylase', 'Aromatic amino acid hydroxylase, C-terminal', 'Tyrosine 3-monooxygenase-like', 'Phenylalanine-4-hydroxylase, tetrameric form'], 'exac.nonpsych.exp_syn': 53.1021768402, 'ipi': ['IPI00017579', 'IPI01022545', 'IPI01022733'], 'reagent.GNF_mm-kinase_plasmid-shRNA.relationship': 'is', 'exac.nonpsych.lof_z': 0.93401150084635, 'refseq.protein': ['NP_000268.1', 'XP_016874859.1'], 'exac.all.n_mis': 186.0, 'exac.all.p_null': 0.692551334801337, 'accession.translation.protein': ['ADQ32100.1', 'AAD13926.1', 'AAA60082.1', 'AAA80910.1', 'AAD13928.1', 'NP_000268.1', 'BAG36181.1', 'BAG60644.1', 'XP_016874859.1', 'AAH26251.1', 'AAC51772.1'], 'exons_hg19.transcript': 'NM_000277', 'exac.all.mu_syn': 4.7550810356e-06, 'exac.nonpsych.mu_lof': 1.53695791301e-06, 'prosite': ['PS51410', 'PS51671'], 'exac.all.exp_lof': 22.7041593659, 'refseq.genomic'

#### Step 4: Transform JSON doc to JSON-LD doc and Nquads format

In [21]:
# load context file
import json
context = json.loads(open('context/mygene_context.json').read())
# construct json-ld doc
doc.update(context)
# transform json-ld doc to nquads format
from pyld import jsonld
t = jsonld.JsonLdProcessor()
nquads = t.parse_nquads(jsonld.to_rdf(doc, {'format': 'application/nquads'}))['@default']
print(nquads)

[{'subject': {'type': 'blank node', 'value': '_:b0'}, 'predicate': {'type': 'IRI', 'value': 'http://identifiers.org/ensembl.gene/'}, 'object': {'type': 'literal', 'value': 'ENSG00000171759', 'datatype': 'http://www.w3.org/2001/XMLSchema#string'}}, {'subject': {'type': 'blank node', 'value': '_:b0'}, 'predicate': {'type': 'IRI', 'value': 'http://identifiers.org/ensembl.protein/'}, 'object': {'type': 'literal', 'value': 'ENSP00000303500', 'datatype': 'http://www.w3.org/2001/XMLSchema#string'}}, {'subject': {'type': 'blank node', 'value': '_:b0'}, 'predicate': {'type': 'IRI', 'value': 'http://identifiers.org/ensembl.protein/'}, 'object': {'type': 'literal', 'value': 'ENSP00000446658', 'datatype': 'http://www.w3.org/2001/XMLSchema#string'}}, {'subject': {'type': 'blank node', 'value': '_:b0'}, 'predicate': {'type': 'IRI', 'value': 'http://identifiers.org/ensembl.protein/'}, 'object': {'type': 'literal', 'value': 'ENSP00000447620', 'datatype': 'http://www.w3.org/2001/XMLSchema#string'}}, {'

#### Step 5: Fetch value using URI from Nquads format

In [19]:
value_list = []
for item in nquads:
    if item['predicate']['value'] == output_type:
        value_list.append(item['object']['value'])
value = list(set(value_list))
print(value)

['WP706', 'WP550']
