Skip to content

Commit

Permalink
build APIDumper
Browse files Browse the repository at this point in the history
and rename minor items
  • Loading branch information
zcqian committed Jan 4, 2022
1 parent 3a436a1 commit 93df315
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 10 deletions.
3 changes: 3 additions & 0 deletions src/hub/dataload/sources/unii/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .dumper import MyGeneUNIIDumper

# TODO: Implement uploader, mapping, and metadata
45 changes: 35 additions & 10 deletions src/hub/dataload/sources/unii/dumper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import os
from collections import defaultdict

import requests
from biothings_client import get_client
from collections import defaultdict

from biothings.hub.dataload.dumper import APIDumper
from config import DATA_ARCHIVE_ROOT

try:
from biothings import config
Expand All @@ -11,23 +15,45 @@
logger = logging.getLogger(__name__)


GENE_CLIENT = get_client('gene')
__all__ = [
'MyGeneUNIIDumper',
]


class MyGeneUNIIDumper(APIDumper):
SRC_NAME = 'mygene_unii'
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)

@staticmethod
def get_release():
resp = requests.get(
'https://api.fda.gov/other/substance.json?limit=0',
timeout=15
).json()
release = resp['meta']['last_updated']
return release

def query_uniprot(uniprot: list):
@staticmethod
def get_document():
for doc in _load_unii():
yield 'gene_unii.ndjson', doc


def _query_uniprot(uniprot: list):
"""Use biothings_client.py to query uniprot codes and get back '_id' in mygene.info
:param: uniprot: list of uniprot codes
"""
res = GENE_CLIENT.querymany(uniprot, scopes='uniprot', fields='_id', returnall=True)
gene_client = get_client('gene')
res = gene_client.querymany(uniprot, scopes='uniprot', fields='_id', returnall=True)
new_res = defaultdict(list)
for item in res['out']:
if not "notfound" in item:
new_res[item['query']].append(item['_id'])
return [new_res, res]


def get_uniprot():
def _get_uniprot():
"""Requests the fda api to return uniprot: unii dictionary
"""
Expand Down Expand Up @@ -60,9 +86,10 @@ def get_uniprot():
return doc


def load_unii():
docs = get_uniprot()
ids = query_uniprot(list(docs.keys()))
def _load_unii():
# FIXME: correctly handle all timeouts
docs = _get_uniprot()
ids = _query_uniprot(list(docs.keys()))
logger.info("This is the number of missing uniprot to gene_id: %d", len(ids[1]['missing']))
logger.debug("This is the list of missing uniprot to gene_id: %s", ids[1]['missing'])
for prot, unii in docs.items():
Expand All @@ -73,5 +100,3 @@ def load_unii():
"unii": unii
}
yield rec


Empty file.

0 comments on commit 93df315

Please sign in to comment.