Merge 1eb91b8 into 8dc9cd4

4dn-dcic · Nov 16, 2016 · a915035 · a915035
2 parents 8dc9cd4 + 1eb91b8
commit a915035
Show file tree

Hide file tree

Showing 14 changed files with 238 additions and 32 deletions.
diff --git a/Data_Files/Rao_et_al_2014/fieldsRao.xls b/Data_Files/Rao_et_al_2014/fieldsRao.xls
diff --git a/README.md b/README.md
@@ -119,3 +119,24 @@ you to update the release number, then tag the code with that version number
 and push it to github, which will trigger travis to build and test and if
 tests pass it will deploy to production version of pypi. Note that travis will
 automatically deploy the new version if you push a tag to git.
+
+# Pytest
+Every function is tested by pytest implementation. It can be run in terminal in submit4dn folder by:
+
+    py.test
+
+Some tests need internet access, and labeled with "webtest" mark.
+
+Some tests have file operations, and labeled with "file_operation" mark.
+
+To run the mark tests, or exclude them from the tests you can use the following commands:
+
+    # Run all tests
+    py.test
+
+    # Run only webtest
+    py.test -m webtest
+
+    # Run only tests with file_opration
+    py.test -m file_operation
+
diff --git a/tests/data_files/Document_insert.xls b/tests/data_files/Document_insert.xls
diff --git a/tests/data_files/Exp_HiC_insert.xls b/tests/data_files/Exp_HiC_insert.xls
diff --git a/tests/data_files/Vender_ordered_reference.xls b/tests/data_files/Vender_ordered_reference.xls
diff --git a/tests/data_files/Vendor.xls b/tests/data_files/Vendor.xls
diff --git a/tests/data_files/Vendor_insert.xls b/tests/data_files/Vendor_insert.xls
diff --git a/tests/data_files/Vendor_ordered reference.xls b/tests/data_files/Vendor_ordered reference.xls
diff --git a/tests/data_files/example.fastq.gz b/tests/data_files/example.fastq.gz
diff --git a/tests/test_fdnDCIC.py b/tests/test_fdnDCIC.py
@@ -1,5 +1,6 @@
 import wranglertools.fdnDCIC as fdnDCIC
 import json
+import pytest
 # test data is in conftest.py
 
 keypairs = {
@@ -23,6 +24,7 @@ def test_key():
     assert isinstance(key.authid, str)
 
 
+@pytest.mark.file_operation
 def test_key_file():
     key = fdnDCIC.FDN_Key('./tests/data_files/keypairs.json', "default")
     assert(key)
@@ -57,11 +59,13 @@ def test_FDN_url():
         assert t_url == expected_url[n]
 
 
+@pytest.mark.file_operation
 def test_md5():
     md5_keypairs = fdnDCIC.md5('./tests/data_files/keypairs.json')
     assert md5_keypairs == "19d43267b642fe1868e3c136a2ee06f2"
 
 
+@pytest.mark.webtest
 def test_get_FDN(connection_public):
     # test the schema retrival with public connection
     award_schema = fdnDCIC.get_FDN("/profiles/award.json", connection_public, frame="object")
@@ -209,16 +213,41 @@ def test_fetch_all_items_mock(connection, mocker, returned_vendor_items):
             assert vendor[0].startswith("#")
 
 
+def xls_to_list(xls_file, sheet):
+    import xlrd
+    return_list = []
+    wb = xlrd.open_workbook(xls_file)
+    read_sheet = wb.sheet_by_name(sheet)
+    cols = read_sheet.ncols
+    rows = read_sheet.nrows
+    for row_idx in range(rows):
+        row_val = []
+        for col_idx in range(cols):
+            cell_value = str(read_sheet.cell(row_idx, col_idx))
+
+            row_val.append(cell_value)
+        return_list.append(row_val)
+    return return_list
+
+
+@pytest.mark.file_operation
 def test_order_FDN_mock(connection, mocker, returned_vendor_items):
+    vendor_file = './tests/data_files/Vendor.xls'
+    ordered_file = './tests/data_files/Vendor_ordered.xls'
+    ref_file = './tests/data_files/Vendor_ordered reference.xls'
     import os
     try:
-        os.remove("./tests/data_files/Vendor_ordered.xls")
-    except:
+        os.remove(ordered_file)
+    except OSError:
         pass
+
     with mocker.patch('wranglertools.fdnDCIC.requests.get', return_value=returned_vendor_items):
-        fdnDCIC.order_FDN('./tests/data_files/Vendor.xls', connection)
-        assert os.path.isfile('./tests/data_files/Vendor_ordered.xls')
+        fdnDCIC.order_FDN(vendor_file, connection)
+        assert os.path.isfile(ordered_file)
+    ord_list = xls_to_list(ordered_file, "Vendor")
+    ref_list = xls_to_list(ref_file, "Vendor")
+    assert ord_list == ref_list
     try:
-        os.remove("./tests/data_files/Vendor_ordered.xls")
-    except:
+        os.remove(ordered_file)
+    except OSError:
         pass
diff --git a/tests/test_get_field_info.py b/tests/test_get_field_info.py
@@ -1,4 +1,5 @@
 import wranglertools.get_field_info as gfi
+import pytest
 
 # test data is in conftest.py
 
@@ -93,11 +94,6 @@ def test_build_field_list_embeds_with_dots(embed_properties):
     assert field_list[1].name.startswith('experiment_relation')
 
 
-def test_get_uploadable_fields(connection_public):
-    field_dict = gfi.get_uploadable_fields(connection_public, ['Vendor'])
-    assert field_dict
-
-
 def test_get_uploadable_fields_mock(connection, mocker, returned_vendor_schema):
     with mocker.patch('wranglertools.fdnDCIC.requests.get', return_value=returned_vendor_schema):
         field_dict = gfi.get_uploadable_fields(connection, ['Vendor'])
@@ -109,8 +105,9 @@ def test_get_uploadable_fields_mock(connection, mocker, returned_vendor_schema):
             assert field.enum is not None
 
 
+@pytest.mark.file_operation
 def test_create_xls(connection, mocker, returned_vendor_schema):
-    xls_file = "./tests/data_files/Vendor_ordered.xls"
+    xls_file = "./tests/data_files/Vendor_gfi_test.xls"
     import os
     try:
         os.remove(xls_file)

diff --git a/tests/test_import_data.py b/tests/test_import_data.py
@@ -3,6 +3,7 @@
 # test data is in conftest.py
 
 
+@pytest.mark.file_operation
 def test_attachment_image():
     attach = imp.attachment("./tests/data_files/test.jpg")
     assert attach['height'] == 1080
@@ -12,25 +13,29 @@ def test_attachment_image():
     assert attach['href'].startswith('data:image/jpeg;base64')
 
 
+@pytest.mark.file_operation
 def test_attachment_pdf():
     attach = imp.attachment("./tests/data_files/test.pdf")
     assert attach['download'] == 'test.pdf'
     assert attach['type'] == 'application/pdf'
     assert attach['href'].startswith('data:application/pdf;base64')
 
 
+@pytest.mark.file_operation
 def test_attachment_image_wrong_extension():
     with pytest.raises(ValueError) as excinfo:
         imp.attachment("./tests/data_files/test_jpeg.tiff")
     assert str(excinfo.value) == 'Wrong extension for image/jpeg: test_jpeg.tiff'
 
 
+@pytest.mark.file_operation
 def test_attachment_text_wrong_extension():
     with pytest.raises(ValueError) as excinfo:
         imp.attachment("./tests/data_files/test_txt.pdf")
     assert str(excinfo.value) == 'Wrong extension for text/plain: test_txt.pdf'
 
 
+@pytest.mark.webtest
 def test_attachment_url():
     import os
     attach = imp.attachment("https://wordpress.org/plugins/about/readme.txt")
@@ -43,34 +48,38 @@ def test_attachment_url():
         pass
 
 
+@pytest.mark.file_operation
 def test_attachment_not_accepted():
     with pytest.raises(ValueError) as excinfo:
         imp.attachment("./tests/data_files/test.mp3")
     assert str(excinfo.value) == 'Unknown file type for test.mp3'
 
 
+@pytest.mark.file_operation
 def test_reader(vendor_raw_xls_fields):
     readxls = imp.reader('./tests/data_files/Vendor.xls')
     for n, row in enumerate(readxls):
         assert row == vendor_raw_xls_fields[n]
 
 
+@pytest.mark.file_operation
 def test_reader_with_sheetname(vendor_raw_xls_fields):
     readxls = imp.reader('./tests/data_files/Vendor.xls', 'Vendor')
     for n, row in enumerate(readxls):
         assert row == vendor_raw_xls_fields[n]
 
 
+@pytest.mark.file_operation
 def test_reader_wrong_sheetname():
     readxls = imp.reader('./tests/data_files/Vendor.xls', 'Enzyme')
     list_readxls = list(readxls)
     assert list_readxls == []
 
 
+@pytest.mark.file_operation
 def test_cell_value():
     readxls = imp.reader('./tests/data_files/test_cell_values.xls')
     list_readxls = list(readxls)
-    print(list_readxls)
     assert list_readxls == [['BOOLEAN', '1'], ['NUMBER', '10'], ['DATE', '2016-09-02']]
 
 
@@ -89,6 +98,16 @@ def test_formatter_gets_lists_correctly():
     assert ['1', '2', '3'] == imp.data_formatter("'[1,2,3]'", 'array')
 
 
+def test_build_field_empty_is_skipped():
+    assert imp.build_field('some_field', '', 'string') is None
+    assert imp.build_field('', 'some_data', 'string') is None
+
+
+def test_build_field_old_stype_field():
+    old_style = imp.build_field('some_field:int', "5", None)
+    assert old_style == {'some_field': 5}
+
+
 def test_build_patch_json_removes_empty_fields(file_metadata, file_metadata_type):
     post_json = imp.build_patch_json(file_metadata, file_metadata_type)
 
@@ -131,9 +150,150 @@ def test_get_fields_type():
 def test_get_existing_uuid(connection, mocker, returned_vendor_existing_item):
     post_jsons = [{'uuid': 'some_uuid'},
                   {'accession': 'some_accession'},
-                  {'aliases': ['some_uuid']},
+                  {'aliases': ['some_acc']},
                   {'@id': 'some_@id'}]
     for post_json in post_jsons:
         with mocker.patch('wranglertools.fdnDCIC.requests.get', return_value=returned_vendor_existing_item):
             response = imp.get_existing(post_json, connection)
             assert response == returned_vendor_existing_item.json()
+
+
+@pytest.mark.file_operation
+def test_excel_reader_no_update_no_patchall_new_doc_with_attachment(capsys, mocker, connection):
+    # test new item submission without patchall update tags and check the return message
+    test_insert = './tests/data_files/Document_insert.xls'
+    dict_load = {}
+    with mocker.patch('wranglertools.import_data.get_existing', return_value={}):
+        imp.excel_reader(test_insert, 'Document', False, connection, False, dict_load)
+        args = imp.get_existing.call_args
+        attach = args[0][0]['attachment']
+        assert attach['href'].startswith('data:image/jpeg;base64')
+
+
+@pytest.mark.file_operation
+def test_excel_reader_no_update_no_patchall_new_item(capsys, mocker, connection):
+    # test new item submission without patchall update tags and check the return message
+    test_insert = './tests/data_files/Vendor_insert.xls'
+    dict_load = {}
+    message = "This looks like a new row but the update flag wasn't passed, use --update to post new data"
+    post_json = {'lab': 'sample-lab',
+                 'description': 'Sample description',
+                 'award': 'SampleAward',
+                 'title': 'Sample Vendor',
+                 'url': 'https://www.sample_vendor.com/',
+                 'aliases': ['dcic:sample_vendor']}
+    with mocker.patch('wranglertools.import_data.get_existing', return_value={}):
+        imp.excel_reader(test_insert, 'Vendor', False, connection, False, dict_load)
+        args = imp.get_existing.call_args
+        assert args[0][0] == post_json
+        out, err = capsys.readouterr()
+        assert out.strip() == message
+
+
+@pytest.mark.file_operation
+def test_excel_reader_no_update_no_patchall_existing_item(capsys, mocker, connection):
+    # test exisiting item submission without patchall update tags and check the return message
+    test_insert = "./tests/data_files/Vendor_insert.xls"
+    dict_load = {}
+    message = "VENDOR: 0 out of 1 posted, 0 errors, 0 patched, 1 not patched (use --patchall to patch)."
+    post_json = {'lab': 'sample-lab',
+                 'description': 'Sample description',
+                 'award': 'SampleAward',
+                 'title': 'Sample Vendor',
+                 'url': 'https://www.sample_vendor.com/',
+                 'aliases': ['dcic:sample_vendor']}
+    existing_vendor = {'uuid': 'sample_uuid'}
+    with mocker.patch('wranglertools.import_data.get_existing', return_value=existing_vendor):
+        imp.excel_reader(test_insert, 'Vendor', False, connection, False, dict_load)
+        args = imp.get_existing.call_args
+        assert args[0][0] == post_json
+        out, err = capsys.readouterr()
+        assert out.strip() == message
+
+
+@pytest.mark.file_operation
+def test_excel_reader_no_update_no_patchall_new_experiment_expset_combined(mocker, connection):
+    # check if the separated exp set fields in experiments get combined.
+    test_insert = './tests/data_files/Exp_HiC_insert.xls'
+    dict_load = {}
+    post_json = {'experiment_sets': ['a', 'b', 'c', 'd'], 'aliases': ['dcic:test'], 'award': 'test-award',
+                 'experiment_type': 'in situ Hi-C', 'lab': 'test-lab', 'filename': 'example.fastq.gz',
+                 'biosample': 'test-biosample'}
+    with mocker.patch('wranglertools.import_data.get_existing', return_value={}):
+        imp.excel_reader(test_insert, 'ExperimentHiC', False, connection, False, dict_load)
+        args = imp.get_existing.call_args
+        assert args[0][0] == post_json
+
+
+@pytest.mark.file_operation
+def test_excel_reader_update_new_experiment_post_and_file_upload(capsys, mocker, connection):
+    # check if the separated exp set fields in experiments get combined
+    test_insert = './tests/data_files/Exp_HiC_insert.xls'
+    dict_load = {}
+    message0 = "calculating md5 sum for file ./tests/data_files/example.fastq.gz"
+    message1 = "EXPERIMENTHIC: 1 out of 1 posted, 0 errors, 0 patched."
+    e = {'status': 'success', '@graph': [{'uuid': 'some_uuid'}]}
+    # mock fetching existing info, return None
+    with mocker.patch('wranglertools.import_data.get_existing', return_value={}):
+        # mock upload file and skip
+        with mocker.patch('wranglertools.import_data.upload_file', return_value={}):
+            # mock posting new items
+            with mocker.patch('wranglertools.fdnDCIC.new_FDN', return_value=e):
+                imp.excel_reader(test_insert, 'ExperimentHiC', True, connection, False, dict_load)
+                args = imp.fdnDCIC.new_FDN.call_args
+                out, err = capsys.readouterr()
+                outlist = [i.strip() for i in out.split('\n') if i is not ""]
+                post_json_arg = args[0][2]
+                assert post_json_arg['md5sum'] == '8f8cc612e5b2d25c52b1d29017e38f2b'
+                assert message0 == outlist[0]
+                assert message1 == outlist[1]
+
+
+@pytest.mark.file_operation
+def test_excel_reader_patch_experiment_post_and_file_upload(capsys, mocker, connection):
+    # check if the separated exp set fields in experiments get combined
+    test_insert = './tests/data_files/Exp_HiC_insert.xls'
+    dict_load = {}
+    message0 = "calculating md5 sum for file ./tests/data_files/example.fastq.gz"
+    message1 = "EXPERIMENTHIC: 1 out of 1 posted, 0 errors, 1 patched."
+    existing_exp = {'uuid': 'sample_uuid'}
+    e = {'status': 'success',
+         '@graph': [{'uuid': 'some_uuid',
+                     'upload_credentials': 'old_creds',
+                     'accession': 'some_accession'}]}
+    # mock fetching existing info, return None
+    with mocker.patch('wranglertools.import_data.get_existing', return_value=existing_exp):
+        # mock upload file and skip
+        with mocker.patch('wranglertools.import_data.upload_file', return_value={}):
+            # mock posting new items
+            with mocker.patch('wranglertools.fdnDCIC.patch_FDN', return_value=e):
+                # mock get upload creds
+                with mocker.patch('wranglertools.import_data.get_upload_creds', return_value="new_creds"):
+                    imp.excel_reader(test_insert, 'ExperimentHiC', False, connection, True, dict_load)
+                    # check for md5sum
+                    args = imp.fdnDCIC.patch_FDN.call_args
+                    post_json_arg = args[0][2]
+                    assert post_json_arg['md5sum'] == '8f8cc612e5b2d25c52b1d29017e38f2b'
+                    # check for cred getting updated (from old_creds to new_creds)
+                    args_upload = imp.upload_file.call_args
+                    updated_post = args_upload[0][0]
+                    assert updated_post['@graph'][0]['upload_credentials'] == 'new_creds'
+                    # check for output message
+                    out, err = capsys.readouterr()
+                    outlist = [i.strip() for i in out.split('\n') if i is not ""]
+                    assert message0 == outlist[0]
+                    assert message1 == outlist[1]
+
+
+def test_order_sorter(capsys):
+    test_list = ["ExperimentHiC", "BiosampleCellCulture", "Biosource", "Document", "Modification",
+                 "IndividualMouse", "Biosample", "Lab", "User", "Trouble"]
+    ordered_list = ['User', 'Lab', 'Document', 'IndividualMouse', 'Biosource', 'Modification',
+                    'BiosampleCellCulture', 'Biosample', 'ExperimentHiC']
+    message0 = "WARNING! Trouble sheet(s) are not loaded"
+    message1 = '''WARNING! Check the sheet names and the reference list "sheet_order"'''
+    assert ordered_list == imp.order_sorter(test_list)
+    out, err = capsys.readouterr()
+    outlist = [i.strip() for i in out.split('\n') if i is not ""]
+    assert message0 == outlist[0]
+    assert message1 == outlist[1]
diff --git a/wranglertools/fdnDCIC.py b/wranglertools/fdnDCIC.py
@@ -254,14 +254,18 @@ def fetch_all_items(sheet, field_list, connection):
                 field = field.replace("|3", "")
                 if field == "#Field Name:":
                     item_info.append("#")
-                # the attachment fields returns a dictionary
+                # the attachment field returns a dictionary
                 elif field == "attachment":
                     try:
                         item_info.append(item.get(field)['download'])
                     except:
                         item_info.append("")
                 else:
-                    item_info.append(item.get(field, ''))
+                    # when writing values, check for the lists and turn them into string
+                    write_value = item.get(field, '')
+                    if isinstance(write_value, list):
+                        write_value = ','.join(write_value)
+                    item_info.append(write_value)
             all_items.append(item_info)
         return all_items
     else: