Skip to content

Commit

Permalink
Merge 1eb91b8 into 8dc9cd4
Browse files Browse the repository at this point in the history
  • Loading branch information
KorayKirli committed Nov 16, 2016
2 parents 8dc9cd4 + 1eb91b8 commit a915035
Show file tree
Hide file tree
Showing 14 changed files with 238 additions and 32 deletions.
Binary file modified Data_Files/Rao_et_al_2014/fieldsRao.xls
Binary file not shown.
21 changes: 21 additions & 0 deletions README.md
Expand Up @@ -119,3 +119,24 @@ you to update the release number, then tag the code with that version number
and push it to github, which will trigger travis to build and test and if
tests pass it will deploy to production version of pypi. Note that travis will
automatically deploy the new version if you push a tag to git.

# Pytest
Every function is tested by pytest implementation. It can be run in terminal in submit4dn folder by:

py.test

Some tests need internet access, and labeled with "webtest" mark.

Some tests have file operations, and labeled with "file_operation" mark.

To run the mark tests, or exclude them from the tests you can use the following commands:

# Run all tests
py.test

# Run only webtest
py.test -m webtest

# Run only tests with file_opration
py.test -m file_operation

Binary file added tests/data_files/Document_insert.xls
Binary file not shown.
Binary file added tests/data_files/Exp_HiC_insert.xls
Binary file not shown.
Binary file removed tests/data_files/Vender_ordered_reference.xls
Binary file not shown.
Binary file modified tests/data_files/Vendor.xls
Binary file not shown.
Binary file added tests/data_files/Vendor_insert.xls
Binary file not shown.
Binary file added tests/data_files/Vendor_ordered reference.xls
Binary file not shown.
Binary file added tests/data_files/example.fastq.gz
Binary file not shown.
41 changes: 35 additions & 6 deletions tests/test_fdnDCIC.py
@@ -1,5 +1,6 @@
import wranglertools.fdnDCIC as fdnDCIC
import json
import pytest
# test data is in conftest.py

keypairs = {
Expand All @@ -23,6 +24,7 @@ def test_key():
assert isinstance(key.authid, str)


@pytest.mark.file_operation
def test_key_file():
key = fdnDCIC.FDN_Key('./tests/data_files/keypairs.json', "default")
assert(key)
Expand Down Expand Up @@ -57,11 +59,13 @@ def test_FDN_url():
assert t_url == expected_url[n]


@pytest.mark.file_operation
def test_md5():
md5_keypairs = fdnDCIC.md5('./tests/data_files/keypairs.json')
assert md5_keypairs == "19d43267b642fe1868e3c136a2ee06f2"


@pytest.mark.webtest
def test_get_FDN(connection_public):
# test the schema retrival with public connection
award_schema = fdnDCIC.get_FDN("/profiles/award.json", connection_public, frame="object")
Expand Down Expand Up @@ -209,16 +213,41 @@ def test_fetch_all_items_mock(connection, mocker, returned_vendor_items):
assert vendor[0].startswith("#")


def xls_to_list(xls_file, sheet):
import xlrd
return_list = []
wb = xlrd.open_workbook(xls_file)
read_sheet = wb.sheet_by_name(sheet)
cols = read_sheet.ncols
rows = read_sheet.nrows
for row_idx in range(rows):
row_val = []
for col_idx in range(cols):
cell_value = str(read_sheet.cell(row_idx, col_idx))

row_val.append(cell_value)
return_list.append(row_val)
return return_list


@pytest.mark.file_operation
def test_order_FDN_mock(connection, mocker, returned_vendor_items):
vendor_file = './tests/data_files/Vendor.xls'
ordered_file = './tests/data_files/Vendor_ordered.xls'
ref_file = './tests/data_files/Vendor_ordered reference.xls'
import os
try:
os.remove("./tests/data_files/Vendor_ordered.xls")
except:
os.remove(ordered_file)
except OSError:
pass

with mocker.patch('wranglertools.fdnDCIC.requests.get', return_value=returned_vendor_items):
fdnDCIC.order_FDN('./tests/data_files/Vendor.xls', connection)
assert os.path.isfile('./tests/data_files/Vendor_ordered.xls')
fdnDCIC.order_FDN(vendor_file, connection)
assert os.path.isfile(ordered_file)
ord_list = xls_to_list(ordered_file, "Vendor")
ref_list = xls_to_list(ref_file, "Vendor")
assert ord_list == ref_list
try:
os.remove("./tests/data_files/Vendor_ordered.xls")
except:
os.remove(ordered_file)
except OSError:
pass
9 changes: 3 additions & 6 deletions tests/test_get_field_info.py
@@ -1,4 +1,5 @@
import wranglertools.get_field_info as gfi
import pytest

# test data is in conftest.py

Expand Down Expand Up @@ -93,11 +94,6 @@ def test_build_field_list_embeds_with_dots(embed_properties):
assert field_list[1].name.startswith('experiment_relation')


def test_get_uploadable_fields(connection_public):
field_dict = gfi.get_uploadable_fields(connection_public, ['Vendor'])
assert field_dict


def test_get_uploadable_fields_mock(connection, mocker, returned_vendor_schema):
with mocker.patch('wranglertools.fdnDCIC.requests.get', return_value=returned_vendor_schema):
field_dict = gfi.get_uploadable_fields(connection, ['Vendor'])
Expand All @@ -109,8 +105,9 @@ def test_get_uploadable_fields_mock(connection, mocker, returned_vendor_schema):
assert field.enum is not None


@pytest.mark.file_operation
def test_create_xls(connection, mocker, returned_vendor_schema):
xls_file = "./tests/data_files/Vendor_ordered.xls"
xls_file = "./tests/data_files/Vendor_gfi_test.xls"
import os
try:
os.remove(xls_file)
Expand Down
164 changes: 162 additions & 2 deletions tests/test_import_data.py
Expand Up @@ -3,6 +3,7 @@
# test data is in conftest.py


@pytest.mark.file_operation
def test_attachment_image():
attach = imp.attachment("./tests/data_files/test.jpg")
assert attach['height'] == 1080
Expand All @@ -12,25 +13,29 @@ def test_attachment_image():
assert attach['href'].startswith('data:image/jpeg;base64')


@pytest.mark.file_operation
def test_attachment_pdf():
attach = imp.attachment("./tests/data_files/test.pdf")
assert attach['download'] == 'test.pdf'
assert attach['type'] == 'application/pdf'
assert attach['href'].startswith('data:application/pdf;base64')


@pytest.mark.file_operation
def test_attachment_image_wrong_extension():
with pytest.raises(ValueError) as excinfo:
imp.attachment("./tests/data_files/test_jpeg.tiff")
assert str(excinfo.value) == 'Wrong extension for image/jpeg: test_jpeg.tiff'


@pytest.mark.file_operation
def test_attachment_text_wrong_extension():
with pytest.raises(ValueError) as excinfo:
imp.attachment("./tests/data_files/test_txt.pdf")
assert str(excinfo.value) == 'Wrong extension for text/plain: test_txt.pdf'


@pytest.mark.webtest
def test_attachment_url():
import os
attach = imp.attachment("https://wordpress.org/plugins/about/readme.txt")
Expand All @@ -43,34 +48,38 @@ def test_attachment_url():
pass


@pytest.mark.file_operation
def test_attachment_not_accepted():
with pytest.raises(ValueError) as excinfo:
imp.attachment("./tests/data_files/test.mp3")
assert str(excinfo.value) == 'Unknown file type for test.mp3'


@pytest.mark.file_operation
def test_reader(vendor_raw_xls_fields):
readxls = imp.reader('./tests/data_files/Vendor.xls')
for n, row in enumerate(readxls):
assert row == vendor_raw_xls_fields[n]


@pytest.mark.file_operation
def test_reader_with_sheetname(vendor_raw_xls_fields):
readxls = imp.reader('./tests/data_files/Vendor.xls', 'Vendor')
for n, row in enumerate(readxls):
assert row == vendor_raw_xls_fields[n]


@pytest.mark.file_operation
def test_reader_wrong_sheetname():
readxls = imp.reader('./tests/data_files/Vendor.xls', 'Enzyme')
list_readxls = list(readxls)
assert list_readxls == []


@pytest.mark.file_operation
def test_cell_value():
readxls = imp.reader('./tests/data_files/test_cell_values.xls')
list_readxls = list(readxls)
print(list_readxls)
assert list_readxls == [['BOOLEAN', '1'], ['NUMBER', '10'], ['DATE', '2016-09-02']]


Expand All @@ -89,6 +98,16 @@ def test_formatter_gets_lists_correctly():
assert ['1', '2', '3'] == imp.data_formatter("'[1,2,3]'", 'array')


def test_build_field_empty_is_skipped():
assert imp.build_field('some_field', '', 'string') is None
assert imp.build_field('', 'some_data', 'string') is None


def test_build_field_old_stype_field():
old_style = imp.build_field('some_field:int', "5", None)
assert old_style == {'some_field': 5}


def test_build_patch_json_removes_empty_fields(file_metadata, file_metadata_type):
post_json = imp.build_patch_json(file_metadata, file_metadata_type)

Expand Down Expand Up @@ -131,9 +150,150 @@ def test_get_fields_type():
def test_get_existing_uuid(connection, mocker, returned_vendor_existing_item):
post_jsons = [{'uuid': 'some_uuid'},
{'accession': 'some_accession'},
{'aliases': ['some_uuid']},
{'aliases': ['some_acc']},
{'@id': 'some_@id'}]
for post_json in post_jsons:
with mocker.patch('wranglertools.fdnDCIC.requests.get', return_value=returned_vendor_existing_item):
response = imp.get_existing(post_json, connection)
assert response == returned_vendor_existing_item.json()


@pytest.mark.file_operation
def test_excel_reader_no_update_no_patchall_new_doc_with_attachment(capsys, mocker, connection):
# test new item submission without patchall update tags and check the return message
test_insert = './tests/data_files/Document_insert.xls'
dict_load = {}
with mocker.patch('wranglertools.import_data.get_existing', return_value={}):
imp.excel_reader(test_insert, 'Document', False, connection, False, dict_load)
args = imp.get_existing.call_args
attach = args[0][0]['attachment']
assert attach['href'].startswith('data:image/jpeg;base64')


@pytest.mark.file_operation
def test_excel_reader_no_update_no_patchall_new_item(capsys, mocker, connection):
# test new item submission without patchall update tags and check the return message
test_insert = './tests/data_files/Vendor_insert.xls'
dict_load = {}
message = "This looks like a new row but the update flag wasn't passed, use --update to post new data"
post_json = {'lab': 'sample-lab',
'description': 'Sample description',
'award': 'SampleAward',
'title': 'Sample Vendor',
'url': 'https://www.sample_vendor.com/',
'aliases': ['dcic:sample_vendor']}
with mocker.patch('wranglertools.import_data.get_existing', return_value={}):
imp.excel_reader(test_insert, 'Vendor', False, connection, False, dict_load)
args = imp.get_existing.call_args
assert args[0][0] == post_json
out, err = capsys.readouterr()
assert out.strip() == message


@pytest.mark.file_operation
def test_excel_reader_no_update_no_patchall_existing_item(capsys, mocker, connection):
# test exisiting item submission without patchall update tags and check the return message
test_insert = "./tests/data_files/Vendor_insert.xls"
dict_load = {}
message = "VENDOR: 0 out of 1 posted, 0 errors, 0 patched, 1 not patched (use --patchall to patch)."
post_json = {'lab': 'sample-lab',
'description': 'Sample description',
'award': 'SampleAward',
'title': 'Sample Vendor',
'url': 'https://www.sample_vendor.com/',
'aliases': ['dcic:sample_vendor']}
existing_vendor = {'uuid': 'sample_uuid'}
with mocker.patch('wranglertools.import_data.get_existing', return_value=existing_vendor):
imp.excel_reader(test_insert, 'Vendor', False, connection, False, dict_load)
args = imp.get_existing.call_args
assert args[0][0] == post_json
out, err = capsys.readouterr()
assert out.strip() == message


@pytest.mark.file_operation
def test_excel_reader_no_update_no_patchall_new_experiment_expset_combined(mocker, connection):
# check if the separated exp set fields in experiments get combined.
test_insert = './tests/data_files/Exp_HiC_insert.xls'
dict_load = {}
post_json = {'experiment_sets': ['a', 'b', 'c', 'd'], 'aliases': ['dcic:test'], 'award': 'test-award',
'experiment_type': 'in situ Hi-C', 'lab': 'test-lab', 'filename': 'example.fastq.gz',
'biosample': 'test-biosample'}
with mocker.patch('wranglertools.import_data.get_existing', return_value={}):
imp.excel_reader(test_insert, 'ExperimentHiC', False, connection, False, dict_load)
args = imp.get_existing.call_args
assert args[0][0] == post_json


@pytest.mark.file_operation
def test_excel_reader_update_new_experiment_post_and_file_upload(capsys, mocker, connection):
# check if the separated exp set fields in experiments get combined
test_insert = './tests/data_files/Exp_HiC_insert.xls'
dict_load = {}
message0 = "calculating md5 sum for file ./tests/data_files/example.fastq.gz"
message1 = "EXPERIMENTHIC: 1 out of 1 posted, 0 errors, 0 patched."
e = {'status': 'success', '@graph': [{'uuid': 'some_uuid'}]}
# mock fetching existing info, return None
with mocker.patch('wranglertools.import_data.get_existing', return_value={}):
# mock upload file and skip
with mocker.patch('wranglertools.import_data.upload_file', return_value={}):
# mock posting new items
with mocker.patch('wranglertools.fdnDCIC.new_FDN', return_value=e):
imp.excel_reader(test_insert, 'ExperimentHiC', True, connection, False, dict_load)
args = imp.fdnDCIC.new_FDN.call_args
out, err = capsys.readouterr()
outlist = [i.strip() for i in out.split('\n') if i is not ""]
post_json_arg = args[0][2]
assert post_json_arg['md5sum'] == '8f8cc612e5b2d25c52b1d29017e38f2b'
assert message0 == outlist[0]
assert message1 == outlist[1]


@pytest.mark.file_operation
def test_excel_reader_patch_experiment_post_and_file_upload(capsys, mocker, connection):
# check if the separated exp set fields in experiments get combined
test_insert = './tests/data_files/Exp_HiC_insert.xls'
dict_load = {}
message0 = "calculating md5 sum for file ./tests/data_files/example.fastq.gz"
message1 = "EXPERIMENTHIC: 1 out of 1 posted, 0 errors, 1 patched."
existing_exp = {'uuid': 'sample_uuid'}
e = {'status': 'success',
'@graph': [{'uuid': 'some_uuid',
'upload_credentials': 'old_creds',
'accession': 'some_accession'}]}
# mock fetching existing info, return None
with mocker.patch('wranglertools.import_data.get_existing', return_value=existing_exp):
# mock upload file and skip
with mocker.patch('wranglertools.import_data.upload_file', return_value={}):
# mock posting new items
with mocker.patch('wranglertools.fdnDCIC.patch_FDN', return_value=e):
# mock get upload creds
with mocker.patch('wranglertools.import_data.get_upload_creds', return_value="new_creds"):
imp.excel_reader(test_insert, 'ExperimentHiC', False, connection, True, dict_load)
# check for md5sum
args = imp.fdnDCIC.patch_FDN.call_args
post_json_arg = args[0][2]
assert post_json_arg['md5sum'] == '8f8cc612e5b2d25c52b1d29017e38f2b'
# check for cred getting updated (from old_creds to new_creds)
args_upload = imp.upload_file.call_args
updated_post = args_upload[0][0]
assert updated_post['@graph'][0]['upload_credentials'] == 'new_creds'
# check for output message
out, err = capsys.readouterr()
outlist = [i.strip() for i in out.split('\n') if i is not ""]
assert message0 == outlist[0]
assert message1 == outlist[1]


def test_order_sorter(capsys):
test_list = ["ExperimentHiC", "BiosampleCellCulture", "Biosource", "Document", "Modification",
"IndividualMouse", "Biosample", "Lab", "User", "Trouble"]
ordered_list = ['User', 'Lab', 'Document', 'IndividualMouse', 'Biosource', 'Modification',
'BiosampleCellCulture', 'Biosample', 'ExperimentHiC']
message0 = "WARNING! Trouble sheet(s) are not loaded"
message1 = '''WARNING! Check the sheet names and the reference list "sheet_order"'''
assert ordered_list == imp.order_sorter(test_list)
out, err = capsys.readouterr()
outlist = [i.strip() for i in out.split('\n') if i is not ""]
assert message0 == outlist[0]
assert message1 == outlist[1]
8 changes: 6 additions & 2 deletions wranglertools/fdnDCIC.py
Expand Up @@ -254,14 +254,18 @@ def fetch_all_items(sheet, field_list, connection):
field = field.replace("|3", "")
if field == "#Field Name:":
item_info.append("#")
# the attachment fields returns a dictionary
# the attachment field returns a dictionary
elif field == "attachment":
try:
item_info.append(item.get(field)['download'])
except:
item_info.append("")
else:
item_info.append(item.get(field, ''))
# when writing values, check for the lists and turn them into string
write_value = item.get(field, '')
if isinstance(write_value, list):
write_value = ','.join(write_value)
item_info.append(write_value)
all_items.append(item_info)
return all_items
else:
Expand Down

0 comments on commit a915035

Please sign in to comment.