Merge pull request #215 from dianakolusheva/fix_tests

Fix tests
gyorilab · Apr 5, 2021 · 14378a8 · 14378a8
2 parents e315cea + 175053c
commit 14378a8
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 34 deletions.
diff --git a/emmaa/tests/test_reactome_prior.py b/emmaa/tests/test_reactome_prior.py
@@ -14,10 +14,10 @@ def test_rx_id_from_up_id():
     """
     test_cases = [('P01116', 'R-HSA-9653079'),   # KRAS
                   ('P04637', 'R-HSA-69507'),   # TP53
-                  ('Q13485', 'R-HSA-2187323')]  # SMAD4
+                  ('Q13485', 'R-HSA-177103')]  # SMAD4
     for up_id, rx_id in test_cases:
         all_rx_ids = rx_id_from_up_id(up_id)
-        assert rx_id in all_rx_ids
+        assert rx_id in all_rx_ids, (up_id, all_rx_ids)
 
 
 def test_get_pathways_containing_genes():

diff --git a/emmaa/tests/test_xdd.py b/emmaa/tests/test_xdd.py
@@ -4,7 +4,7 @@
 
 @attr('nonpublic')
 def test_document_figures_doi():
-    doi = '10.1101/2020.08.23.20180281'
+    doi = '10.1016/j.apsb.2020.09.003'
     fig_list = get_document_figures(doi, 'DOI')
     assert fig_list
     # Should be a list of tuples with title and image bytes
@@ -15,15 +15,15 @@ def test_document_figures_doi():
 @attr('notravis', 'nonpublic')
 def test_document_figures_other_types():
     # Should get results from different paper ID types
-    trid = 31859624
+    trid = 32094555
     fig_list = get_document_figures(trid, 'TRID')
     assert fig_list
     assert len(fig_list[0]) == 2
-    pmid = '32838361'
+    pmid = '32923317'
     fig_list = get_document_figures(pmid, 'PMID')
     assert fig_list
     assert len(fig_list[0]) == 2
-    pmcid = 'PMC7362813'
+    pmcid = 'PMC7476560'
     fig_list = get_document_figures(pmcid, 'PMCID')
     assert fig_list
     assert len(fig_list[0]) == 2

diff --git a/emmaa/xdd/xdd_client.py b/emmaa/xdd/xdd_client.py
@@ -7,7 +7,6 @@
 logger = logging.getLogger(__name__)
 api_key = os.environ.get('XDD_API_KEY')
 doc_url = 'https://xdddev.chtc.io/sets/xdd-covid-19/cosmos/api/document'
-obj_url = 'https://xdddev.chtc.io/sets/xdd-covid-19/cosmos/api/object/'
 query_url = 'https://xdd.wisc.edu/sets/xdd-covid-19/cosmos/api/search'
 
 
@@ -29,21 +28,7 @@ def get_document_objects(doi):
             logger.warning(f'Did not get results for {doi} page {page}')
             break
         objects += rj['objects']
-    filtered_objects = [
-        obj for obj in objects if obj['cls'] in ['Figure', 'Table']]
-    return filtered_objects
-
-
-def get_figure_from_document_object(obj_dict):
-    """Get a figure title and bytes content from figure object dictionary."""
-    txt = obj_dict['header_content']
-    url = f"{obj_url}{obj_dict['id']}"
-    res = requests.get(url, {'api_key': api_key})
-    rj = res.json()
-    if 'objects' not in rj:
-        return txt, None
-    b = rj['objects'][0]['children'][0]['bytes']
-    return txt, b
+    return objects
 
 
 def get_document_figures(paper_id, paper_id_type):
@@ -81,9 +66,7 @@ def get_document_figures(paper_id, paper_id_type):
     objects = get_document_objects(doi)
     if not objects:
         return []
-    figures = []
-    for obj in objects:
-        figures.append(get_figure_from_document_object(obj))
+    figures = get_figures_from_objects(objects)
     logger.info(f'Returning {len(figures)} figures and tables.')
     return figures
 
@@ -116,14 +99,14 @@ def get_figures_from_query(query, limit=None):
     # If there's a limit of number of figures so we can stop when we reach it
     # or when we run out of objects
     if limit:
-        figures = get_figures_from_query_objects(objects)
+        figures = get_figures_from_objects(objects, True)
         while len(figures) < limit and len(objects) < total:
             page += 1
             rj = send_query_search_request(query, page)
             if not rj:
                 logger.warning(f'Did not get results for {query}, page {page}')
                 break
-            new_figures = get_figures_from_query_objects(rj['objects'])
+            new_figures = get_figures_from_objects(rj['objects'], True)
             figures += new_figures
             objects += rj['objects']
         figures = figures[: limit]
@@ -137,7 +120,7 @@ def get_figures_from_query(query, limit=None):
             logger.warning(f'Did not get results for {query} page {page}')
             break
         objects += rj['objects']
-    figures = get_figures_from_query_objects(objects)
+    figures = get_figures_from_objects(objects, True)
     logger.info(f'Returning {len(figures)} figures and tables.')
     return figures
 
@@ -174,17 +157,20 @@ def send_document_search_request(doi, page):
                         {'doi': doi, 'api_key': api_key, 'page': page})
 
 
-def get_figures_from_query_objects(objects):
+def get_figures_from_objects(objects, paper_links=False):
     """Get a list of paper links, figure titles and their content bytes from
-    a list of object dictionaries (returned from query api)."""
+    a list of object dictionaries (returned from query or document api)."""
     figures = []
     for obj in objects:
         for child in obj['children']:
             if child['cls'] in ['Figure', 'Table']:
                 txt = child['header_content']
                 b = child['bytes']
-                urls = set()
-                for link in obj['bibjson']['link']:
-                    urls.add(link['url'])
-                figures.append((urls, txt, b))
+                if paper_links:
+                    urls = set()
+                    for link in obj['bibjson']['link']:
+                        urls.add(link['url'])
+                    figures.append((urls, txt, b))
+                else:
+                    figures.append((txt, b))
     return figures