Skip to content

Commit

Permalink
Merge pull request #215 from dianakolusheva/fix_tests
Browse files Browse the repository at this point in the history
Fix tests
  • Loading branch information
bgyori committed Apr 5, 2021
2 parents e315cea + 175053c commit 14378a8
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 34 deletions.
4 changes: 2 additions & 2 deletions emmaa/tests/test_reactome_prior.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ def test_rx_id_from_up_id():
"""
test_cases = [('P01116', 'R-HSA-9653079'), # KRAS
('P04637', 'R-HSA-69507'), # TP53
('Q13485', 'R-HSA-2187323')] # SMAD4
('Q13485', 'R-HSA-177103')] # SMAD4
for up_id, rx_id in test_cases:
all_rx_ids = rx_id_from_up_id(up_id)
assert rx_id in all_rx_ids
assert rx_id in all_rx_ids, (up_id, all_rx_ids)


def test_get_pathways_containing_genes():
Expand Down
8 changes: 4 additions & 4 deletions emmaa/tests/test_xdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

@attr('nonpublic')
def test_document_figures_doi():
doi = '10.1101/2020.08.23.20180281'
doi = '10.1016/j.apsb.2020.09.003'
fig_list = get_document_figures(doi, 'DOI')
assert fig_list
# Should be a list of tuples with title and image bytes
Expand All @@ -15,15 +15,15 @@ def test_document_figures_doi():
@attr('notravis', 'nonpublic')
def test_document_figures_other_types():
# Should get results from different paper ID types
trid = 31859624
trid = 32094555
fig_list = get_document_figures(trid, 'TRID')
assert fig_list
assert len(fig_list[0]) == 2
pmid = '32838361'
pmid = '32923317'
fig_list = get_document_figures(pmid, 'PMID')
assert fig_list
assert len(fig_list[0]) == 2
pmcid = 'PMC7362813'
pmcid = 'PMC7476560'
fig_list = get_document_figures(pmcid, 'PMCID')
assert fig_list
assert len(fig_list[0]) == 2
Expand Down
42 changes: 14 additions & 28 deletions emmaa/xdd/xdd_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
logger = logging.getLogger(__name__)
api_key = os.environ.get('XDD_API_KEY')
doc_url = 'https://xdddev.chtc.io/sets/xdd-covid-19/cosmos/api/document'
obj_url = 'https://xdddev.chtc.io/sets/xdd-covid-19/cosmos/api/object/'
query_url = 'https://xdd.wisc.edu/sets/xdd-covid-19/cosmos/api/search'


Expand All @@ -29,21 +28,7 @@ def get_document_objects(doi):
logger.warning(f'Did not get results for {doi} page {page}')
break
objects += rj['objects']
filtered_objects = [
obj for obj in objects if obj['cls'] in ['Figure', 'Table']]
return filtered_objects


def get_figure_from_document_object(obj_dict):
"""Get a figure title and bytes content from figure object dictionary."""
txt = obj_dict['header_content']
url = f"{obj_url}{obj_dict['id']}"
res = requests.get(url, {'api_key': api_key})
rj = res.json()
if 'objects' not in rj:
return txt, None
b = rj['objects'][0]['children'][0]['bytes']
return txt, b
return objects


def get_document_figures(paper_id, paper_id_type):
Expand Down Expand Up @@ -81,9 +66,7 @@ def get_document_figures(paper_id, paper_id_type):
objects = get_document_objects(doi)
if not objects:
return []
figures = []
for obj in objects:
figures.append(get_figure_from_document_object(obj))
figures = get_figures_from_objects(objects)
logger.info(f'Returning {len(figures)} figures and tables.')
return figures

Expand Down Expand Up @@ -116,14 +99,14 @@ def get_figures_from_query(query, limit=None):
# If there's a limit of number of figures so we can stop when we reach it
# or when we run out of objects
if limit:
figures = get_figures_from_query_objects(objects)
figures = get_figures_from_objects(objects, True)
while len(figures) < limit and len(objects) < total:
page += 1
rj = send_query_search_request(query, page)
if not rj:
logger.warning(f'Did not get results for {query}, page {page}')
break
new_figures = get_figures_from_query_objects(rj['objects'])
new_figures = get_figures_from_objects(rj['objects'], True)
figures += new_figures
objects += rj['objects']
figures = figures[: limit]
Expand All @@ -137,7 +120,7 @@ def get_figures_from_query(query, limit=None):
logger.warning(f'Did not get results for {query} page {page}')
break
objects += rj['objects']
figures = get_figures_from_query_objects(objects)
figures = get_figures_from_objects(objects, True)
logger.info(f'Returning {len(figures)} figures and tables.')
return figures

Expand Down Expand Up @@ -174,17 +157,20 @@ def send_document_search_request(doi, page):
{'doi': doi, 'api_key': api_key, 'page': page})


def get_figures_from_query_objects(objects):
def get_figures_from_objects(objects, paper_links=False):
"""Get a list of paper links, figure titles and their content bytes from
a list of object dictionaries (returned from query api)."""
a list of object dictionaries (returned from query or document api)."""
figures = []
for obj in objects:
for child in obj['children']:
if child['cls'] in ['Figure', 'Table']:
txt = child['header_content']
b = child['bytes']
urls = set()
for link in obj['bibjson']['link']:
urls.add(link['url'])
figures.append((urls, txt, b))
if paper_links:
urls = set()
for link in obj['bibjson']['link']:
urls.add(link['url'])
figures.append((urls, txt, b))
else:
figures.append((txt, b))
return figures

0 comments on commit 14378a8

Please sign in to comment.