diff --git a/src/encoded/submit.py b/src/encoded/submit.py index 2449b68411..f85161eef0 100644 --- a/src/encoded/submit.py +++ b/src/encoded/submit.py @@ -110,7 +110,7 @@ def xls_to_json(xls_data, project, institution): specimen_ids = {} for row in rows: indiv_alias = '{}:individual-{}'.format(project['name'], row['individual id']) - fam_alias = '{}:family-{}'.format(project['name'], row['individual id']) + fam_alias = '{}:family-{}'.format(project['name'], row['family id']) # sp_alias = '{}:sampleproc-{}'.format(project['name'], row['specimen id']) # create items for Individual items = fetch_individual_metadata(row, items, indiv_alias, institution['name']) @@ -192,9 +192,9 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, f 'specimen_notes', 'research_protocol_name', 'sent_by', 'physician_id', 'indication' ] info = map_fields(row, info, fields, 'sample') - if info['specimen_accepted'].lower() == 'y': + if info.get('specimen_accepted', '').lower() == 'y': info['specimen_accepted'] = 'Yes' - elif info['specimen_accepted'].lower() == 'n': + elif info.get('specimen_accepted', '').lower() == 'n': info['specimen_accepted'] = 'No' if row.get('second specimen id'): other_id = {'id': row['second specimen id'], 'id_type': proj_name} # add proj info? @@ -202,7 +202,7 @@ def fetch_sample_metadata(row, items, indiv_alias, samp_alias, analysis_alias, f other_id['id_type'] = row['second specimen id type'] info['other_specimen_ids'] = [other_id] req_info = map_fields(row, {}, ['date sent', 'date completed'], 'requisition') - if req_info['accepted_rejected'].lower() in ['yes', 'no', 'y', 'n']: + if req_info.get('accepted_rejected', '').lower() in ['yes', 'no', 'y', 'n']: if req_info['accepted_rejected'].lower().startswith('y'): req_info['accepted_rejected'] = 'Accepted' else: diff --git a/src/encoded/tests/data/documents/cgap_submit_test.xlsx b/src/encoded/tests/data/documents/cgap_submit_test.xlsx index 0e3409a68f..de1a55de5f 100644 Binary files a/src/encoded/tests/data/documents/cgap_submit_test.xlsx and b/src/encoded/tests/data/documents/cgap_submit_test.xlsx differ diff --git a/src/encoded/tests/test_submit.py b/src/encoded/tests/test_submit.py index 4bf8515564..f4b14cde0a 100644 --- a/src/encoded/tests/test_submit.py +++ b/src/encoded/tests/test_submit.py @@ -8,7 +8,7 @@ @pytest.fixture def row_dict(): return { - 'patient id': '456', + 'individual id': '456', 'family id': '333', 'sex': 'M', 'relation to proband': 'proband', @@ -21,7 +21,10 @@ def row_dict(): @pytest.fixture def empty_items(): - return {'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {}} + return { + 'individual': {}, 'family': {}, 'sample': {}, 'sample_processing': {}, + 'case': {}, 'report': {}, 'reports': [] + } @pytest.fixture @@ -57,19 +60,19 @@ def submission_info3(submission_info2): def test_fetch_individual_metadata_new(row_dict, empty_items): - items_out = fetch_individual_metadata(row_dict, empty_items, 'test-proj:indiv1') + items_out = fetch_individual_metadata(row_dict, empty_items, 'test-proj:indiv1', 'hms-dbmi') assert items_out['individual']['test-proj:indiv1']['aliases'] == ['test-proj:indiv1'] assert items_out['individual']['test-proj:indiv1']['individual_id'] == '456' -def test_fetch_individual_metadata_old(row_dict): +def test_fetch_individual_metadata_old(row_dict, empty_items): items = empty_items.copy() items['individual'] = {'test-proj:indiv1': { 'individual_id': '456', 'age': 46, 'aliases': ['test-proj:indiv1'] }} - items_out = fetch_individual_metadata(row_dict, items, 'test-proj:indiv1') + items_out = fetch_individual_metadata(row_dict, items, 'test-proj:indiv1', 'hms-dbmi') assert len(items['individual']) == len(items_out['individual']) assert 'sex' in items_out['individual']['test-proj:indiv1'] assert 'age' in items_out['individual']['test-proj:indiv1'] @@ -81,7 +84,7 @@ def test_fetch_family_metadata_new(row_dict, empty_items): assert items_out['family']['test-proj:fam1']['proband'] == 'test-proj:indiv1' -def test_fetch_family_metadata_old(row_dict): +def test_fetch_family_metadata_old(row_dict, empty_items): items = empty_items.copy() items['family'] = {'test-proj:fam1': { 'aliases': ['test-proj:fam1'], @@ -95,41 +98,34 @@ def test_fetch_family_metadata_old(row_dict): assert items_out['family']['test-proj:fam1']['mother'] == 'test-proj:indiv2' -def test_fetch_sample_metadata_sp(row_dict): +def test_fetch_sample_metadata_sp(row_dict, empty_items): items = empty_items.copy() items['individual'] = {'test-proj:indiv1': {}} - items_out = fetch_sample_metadata(row_dict, items, 'test-proj:indiv1', 'test-proj:samp1', 'test-proj:sp1') + items_out = fetch_sample_metadata( + row_dict, items, 'test-proj:indiv1', 'test-proj:samp1', 'test-proj:sp1', 'test-proj:fam1', 'test-proj' + ) assert items_out['sample']['test-proj:samp1']['specimen_accession'] == row_dict['specimen id'] assert items_out['sample_processing']['test-proj:sp1']['samples'] == ['test-proj:samp1'] assert items_out['individual']['test-proj:indiv1']['samples'] == ['test-proj:samp1'] -def test_fetch_sample_metadata_no_sp(row_dict): - items = empty_items.copy() - items['individual'] = {'test-proj:indiv1': {}} - row_dict['report required'] = 'N' - items_out = fetch_sample_metadata(row_dict, items, 'test-proj:indiv1', 'test-proj:samp1', 'test-proj:sp1') - assert items_out['sample']['test-proj:samp1']['specimen_accession'] == row_dict['specimen id'] - assert not items_out['sample_processing'] - - -def test_create_sample_processing_groups_grp(submission_info2): - items_out = create_sample_processing_groups(submission_info2, 'test-proj:sp-multi') - assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Group' - assert len(items_out['sample_processing']['test-proj:sp-multi']['samples']) == 2 - - -def test_create_sample_processing_groups_one(submission_info): - items_out = create_sample_processing_groups(submission_info, 'test-proj:sp-single') - assert not items_out['sample_processing'] - - -def test_create_sample_processing_groups_trio(submission_info3): - items_out = create_sample_processing_groups(submission_info3, 'test-proj:sp-multi') - assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Group' - submission_info3['family']['test-proj:fam1']['father'] = 'test-proj:indiv3' - items_out = create_sample_processing_groups(submission_info3, 'test-proj:sp-multi') - assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Trio' +# def test_create_sample_processing_groups_grp(submission_info2): +# items_out = create_sample_processing_groups(submission_info2, 'test-proj:sp-multi') +# assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Group' +# assert len(items_out['sample_processing']['test-proj:sp-multi']['samples']) == 2 +# +# +# def test_create_sample_processing_groups_one(submission_info): +# items_out = create_sample_processing_groups(submission_info, 'test-proj:sp-single') +# assert not items_out['sample_processing'] +# +# +# def test_create_sample_processing_groups_trio(submission_info3): +# items_out = create_sample_processing_groups(submission_info3, 'test-proj:sp-multi') +# assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Group' +# submission_info3['family']['test-proj:fam1']['father'] = 'test-proj:indiv3' +# items_out = create_sample_processing_groups(submission_info3, 'test-proj:sp-multi') +# assert items_out['sample_processing']['test-proj:sp-multi']['analysis_type'] == 'WGS-Trio' def test_xls_to_json(project, institution):