In [1]:
import json
from pyld import jsonld
import myvariant

In [2]:
# import MyVariantInfo class
mv = myvariant.MyVariantInfo()

In [3]:
# list uris that belong to top level data sources
top_level_uris = [
      "http://schema.myvariant.info/datasource/cadd",
      "http://schema.myvariant.info/datasource/clinvar",
      "http://schema.myvariant.info/datasource/dbnsfp",
      "http://schema.myvariant.info/datasource/dbsnp",
      "http://schema.myvariant.info/datasource/docm",
      "http://schema.myvariant.info/datasource/emv",
      "http://schema.myvariant.info/datasource/evs",
      "http://schema.myvariant.info/datasource/gwassnps",
      "http://schema.myvariant.info/datasource/mutdb",
      "http://schema.myvariant.info/datasource/snpeff"]

In [4]:
# transform from jsonld doc to nquads doc
def nquads_transform(doc):
    t = jsonld.JsonLdProcessor()
    nquads = t.parse_nquads(jsonld.to_rdf(doc, {'format': 'application/nquads'}))['@default']
    return nquads

In [5]:
# get value and node
def get_value_and_node(nquads, uri):
    node_list = []
    value_list = []
    for item in nquads:
        if item['predicate']['value'] == uri:
            node_list.append(item['subject']['value'])
            value_list.append(item['object']['value'])
    return (node_list, value_list)

In [6]:
def find_top_level_uri(nquads_id, nquads):
    for item in nquads:
        if item['object']['value'] == nquads_id:
            if item['predicate']['value'] in top_level_uris:
                uri = item['predicate']['value']
            elif item['predicate']['value'] not in top_level_uris:
                uri = find_top_level_uri(item['subject']['value'], nquads)
            else:
                print("couldn't find top level uri")
    return uri

In [7]:
def fetch_value_source_for_variant(_id, uri):
    doc = mv.getvariant(_id, jsonld=True)
    nquads = nquads_transform(doc)
    (node, value) = get_value_and_node(nquads,uri)
    source = []
    for item in node:
        source.append(find_top_level_uri(item, nquads))
    result = [i + ' ' + j for i,j in zip(value, source)]
    return result

In [8]:
# Making Data-structure Neutral Queries by URI
fetch_value_source_for_variant('chr9:g.135781006_135781007del',
                               'http://identifiers.org/omim/')

['109800 http://schema.myvariant.info/datasource/clinvar']

In [9]:
# Data Discrepancy Check
fetch_value_source_for_variant('chr12:g.111351981C>T',
                               'http://identifiers.org/dbsnp/')

['rs200387971 http://schema.myvariant.info/datasource/evs',
 'rs371405579 http://schema.myvariant.info/datasource/clinvar',
 'rs371405579 http://schema.myvariant.info/datasource/dbsnp']

In [10]:
# normal query for OMIM ID by myvariant.info python client
mv.getvariant('chr9:g.135781006_135781007del', 
              fields='clinvar.rcv.conditions.identifiers.omim')

{'_id': 'chr9:g.135781006_135781007del',
 '_version': 1,
 'clinvar': {'rcv': {'conditions': {'identifiers': {'omim': '109800'}}}}}