diff --git a/ChangeLog.md b/ChangeLog.md index 4242957d..4af2d8c5 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -19,6 +19,7 @@ Emperor 0.9.3-dev (changes since Emperor 0.9.2 go here) * Fixed problem where coordinate files with large values (greater than 100) would not be displayed on screen. * Fixed problem that prevented the user from scrolling through the categories in the user interface. +* Fixed problem that removed unique/single-valued categories in the mapping file even if these were selected with `--color_by`. * Clean-up the layout of the user interface so it's cleaner and consistent. * Fix problem where long category names would alter the layout of the interface. * Fix inability to write an 'E' character in the Filename field when exporting an svg. diff --git a/emperor/util.py b/emperor/util.py index cd7fa06d..53171686 100644 --- a/emperor/util.py +++ b/emperor/util.py @@ -146,20 +146,23 @@ def preprocess_mapping_file(data, headers, columns, unique=False, single=False, line.append(''.join([line[index] for index in indices])) headers.append(new_column) - # remove all unique or singled valued columns + # remove all unique or singled valued columns that are not included in + # the list of categories that should be kept i. e. columns if unique or single: columns_to_remove = [] metadata = MetadataMap(mapping_file_to_dict(data, headers), []) # find columns that have values that are all unique - if unique == True: - columns_to_remove += [column_name for column_name in headers[1::] - if metadata.hasUniqueCategoryValues(column_name)] + if unique: + for c in headers[1::]: + if metadata.hasUniqueCategoryValues(c) and c not in columns: + columns_to_remove.append(c) # remove categories where there is only one value - if single == True: - columns_to_remove += [column_name for column_name in headers[1::] - if metadata.hasSingleCategoryValue(column_name)] + if single: + for c in headers[1::]: + if metadata.hasSingleCategoryValue(c) and c not in columns: + columns_to_remove.append(c) columns_to_remove = list(set(columns_to_remove)) # remove the single or unique columns diff --git a/tests/test_util.py b/tests/test_util.py index 209f05c1..9ad8d158 100755 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -121,8 +121,8 @@ def test_preprocess_mapping_file(self): # check it removes columns with unique values out_data, out_headers = preprocess_mapping_file(self.mapping_file_data, - self.mapping_file_headers, ['SampleID', 'BarcodeSequence', - 'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'], + self.mapping_file_headers, ['SampleID', 'LinkerPrimerSequence', + 'Treatment', 'DOB'], unique=True) self.assertEquals(out_headers, ['SampleID', 'LinkerPrimerSequence', 'Treatment', 'DOB']) @@ -131,7 +131,7 @@ def test_preprocess_mapping_file(self): # check it removes columns where there is only one value out_data, out_headers = preprocess_mapping_file(self.mapping_file_data, self.mapping_file_headers, ['SampleID', 'BarcodeSequence', - 'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'], + 'Treatment', 'DOB', 'Description'], single=True) self.assertEquals(out_headers,['SampleID', 'BarcodeSequence', 'Treatment', 'DOB', 'Description']) @@ -151,6 +151,32 @@ def test_preprocess_mapping_file(self): self.assertEquals(out_data, MAPPING_FILE_DATA_DUPLICATED) self.assertEquals(out_headers, ['SampleID', 'Treatment', 'DOB']) + # check it doesn't remove columns because all are included in the list + out_data, out_headers = preprocess_mapping_file(self.mapping_file_data, + self.mapping_file_headers, ['SampleID', 'BarcodeSequence', + 'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'], + unique=True) + self.assertEquals(out_headers, ['SampleID', 'BarcodeSequence', + 'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description']) + self.assertEquals(out_data, MAPPING_FILE_DATA_CAT_G) + + # check it doesn't remove columns because all are included in the list + out_data, out_headers = preprocess_mapping_file(self.mapping_file_data, + self.mapping_file_headers, ['SampleID', 'BarcodeSequence', + 'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'], + single=True) + self.assertEquals(out_headers, ['SampleID', 'BarcodeSequence', + 'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description']) + self.assertEquals(out_data, MAPPING_FILE_DATA_CAT_G) + + # check it doesn't remove columns because all are included in the list + out_data, out_headers = preprocess_mapping_file(self.mapping_file_data, + self.mapping_file_headers, ['SampleID', 'BarcodeSequence', + 'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'], + unique=True, single=True) + self.assertEquals(out_headers, ['SampleID', 'BarcodeSequence', + 'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description']) + self.assertEquals(out_data, MAPPING_FILE_DATA_CAT_G) def test_keep_columns_from_mapping_file(self): """Check correct selection of metadata is being done""" @@ -438,6 +464,20 @@ def test_sanitize_mapping_file(self): ['PC.635', 'Fast', 'Fast20080116'], ['PC.636', 'Fast', 'Fast20080116']] +MAPPING_FILE_DATA_CAT_G = [['PC.354', 'AGCACGAGCCTA', 'YATGCTGCCTCCCGTAGGAGT', +'Control', '20061218', 'Control_mouse_I.D._354'], ['PC.355', 'AACTCGTCGATG', +'YATGCTGCCTCCCGTAGGAGT', 'Control', '20061218', 'Control_mouse_I.D._355'], +['PC.356', 'ACAGACCACTCA', 'YATGCTGCCTCCCGTAGGAGT', 'Control', '20061126', +'Control_mouse_I.D._356'], ['PC.481', 'ACCAGCGACTAG', 'YATGCTGCCTCCCGTAGGAGT', +'Control', '20070314', 'Control_mouse_I.D._481'], ['PC.593', 'AGCAGCACTTGT', +'YATGCTGCCTCCCGTAGGAGT', 'Control', '20071210', 'Control_mouse_I.D._593'], +['PC.607', 'AACTGTGCGTAC', 'YATGCTGCCTCCCGTAGGAGT', 'Fast', '20071112', +'Fasting_mouse_I.D._607'], ['PC.634', 'ACAGAGTCGGCT', 'YATGCTGCCTCCCGTAGGAGT', +'Fast', '20080116', 'Fasting_mouse_I.D._634'], ['PC.635', 'ACCGCAGAGTCA', +'YATGCTGCCTCCCGTAGGAGT', 'Fast', '20080116', 'Fasting_mouse_I.D._635'], +['PC.636', 'ACGGTGAGTGTC', 'YATGCTGCCTCCCGTAGGAGT', 'Fast', '20080116', +'Fasting_mouse_I.D._636']] + MAPPING_FILE_DATA_GRADIENT = [ ['PC.354', 'Control','3', '40', 'Control20061218'], ['PC.355', 'Control','9', '44', 'Control20061218'],