biocore · antgonza · Sep 6, 2014 · Sep 5, 2014 · Sep 5, 2014 · Sep 6, 2014
diff --git a/ChangeLog.md b/ChangeLog.md
@@ -19,6 +19,7 @@ Emperor 0.9.3-dev (changes since Emperor 0.9.2 go here)
 
 * Fixed problem where coordinate files with large values (greater than 100) would not be displayed on screen.
 * Fixed problem that prevented the user from scrolling through the categories in the user interface.
+* Fixed problem that removed unique/single-valued categories in the mapping file even if these were selected with `--color_by`.
 * Clean-up the layout of the user interface so it's cleaner and consistent.
 * Fix problem where long category names would alter the layout of the interface.
 * Fix inability to write an 'E' character in the Filename field when exporting an svg.

diff --git a/emperor/util.py b/emperor/util.py
@@ -146,20 +146,23 @@ def preprocess_mapping_file(data, headers, columns, unique=False, single=False,
             line.append(''.join([line[index] for index in indices]))
         headers.append(new_column)
 
-    # remove all unique or singled valued columns
+    # remove all unique or singled valued columns that are not included in
+    # the list of categories that should be kept i. e. columns
     if unique or single:
         columns_to_remove = []
         metadata = MetadataMap(mapping_file_to_dict(data, headers), [])
 
         # find columns that have values that are all unique
-        if unique == True:
-            columns_to_remove += [column_name for column_name in headers[1::]
-                if metadata.hasUniqueCategoryValues(column_name)]
+        if unique:
+            for c in headers[1::]:
+                if metadata.hasUniqueCategoryValues(c) and c not in columns:
+                    columns_to_remove.append(c)
 
         # remove categories where there is only one value
-        if single == True:
-            columns_to_remove += [column_name for column_name in headers[1::]
-                if metadata.hasSingleCategoryValue(column_name)]
+        if single:
+            for c in headers[1::]:
+                if metadata.hasSingleCategoryValue(c) and c not in columns:
+                    columns_to_remove.append(c)
         columns_to_remove = list(set(columns_to_remove))
 
         # remove the single or unique columns

diff --git a/tests/test_util.py b/tests/test_util.py
@@ -121,8 +121,8 @@ def test_preprocess_mapping_file(self):
 
         # check it removes columns with unique values
         out_data, out_headers = preprocess_mapping_file(self.mapping_file_data,
-            self.mapping_file_headers, ['SampleID', 'BarcodeSequence',
-            'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'],
+            self.mapping_file_headers, ['SampleID', 'LinkerPrimerSequence',
+            'Treatment', 'DOB'],
             unique=True)
         self.assertEquals(out_headers, ['SampleID', 'LinkerPrimerSequence',
             'Treatment', 'DOB'])
@@ -131,7 +131,7 @@ def test_preprocess_mapping_file(self):
         # check it removes columns where there is only one value
         out_data, out_headers = preprocess_mapping_file(self.mapping_file_data,
             self.mapping_file_headers, ['SampleID', 'BarcodeSequence',
-            'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'],
+            'Treatment', 'DOB', 'Description'],
             single=True)
         self.assertEquals(out_headers,['SampleID', 'BarcodeSequence',
             'Treatment', 'DOB', 'Description'])
@@ -151,6 +151,32 @@ def test_preprocess_mapping_file(self):
         self.assertEquals(out_data, MAPPING_FILE_DATA_DUPLICATED)
         self.assertEquals(out_headers, ['SampleID', 'Treatment', 'DOB'])
 
+        # check it doesn't remove columns because all are included in the list
+        out_data, out_headers = preprocess_mapping_file(self.mapping_file_data,
+            self.mapping_file_headers, ['SampleID', 'BarcodeSequence',
+            'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'],
+            unique=True)
+        self.assertEquals(out_headers, ['SampleID', 'BarcodeSequence',
+            'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'])
+        self.assertEquals(out_data, MAPPING_FILE_DATA_CAT_G)
+
+        # check it doesn't remove columns because all are included in the list
+        out_data, out_headers = preprocess_mapping_file(self.mapping_file_data,
+            self.mapping_file_headers, ['SampleID', 'BarcodeSequence',
+            'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'],
+            single=True)
+        self.assertEquals(out_headers, ['SampleID', 'BarcodeSequence',
+            'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'])
+        self.assertEquals(out_data, MAPPING_FILE_DATA_CAT_G)
+
+        # check it doesn't remove columns because all are included in the list
+        out_data, out_headers = preprocess_mapping_file(self.mapping_file_data,
+            self.mapping_file_headers, ['SampleID', 'BarcodeSequence',
+            'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'],
+            unique=True, single=True)
+        self.assertEquals(out_headers, ['SampleID', 'BarcodeSequence',
+            'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'])
+        self.assertEquals(out_data, MAPPING_FILE_DATA_CAT_G)
 
     def test_keep_columns_from_mapping_file(self):
         """Check correct selection of metadata is being done"""
@@ -438,6 +464,20 @@ def test_sanitize_mapping_file(self):
     ['PC.635', 'Fast', 'Fast20080116'],
     ['PC.636', 'Fast', 'Fast20080116']]
 
+MAPPING_FILE_DATA_CAT_G = [['PC.354', 'AGCACGAGCCTA', 'YATGCTGCCTCCCGTAGGAGT',
+'Control', '20061218', 'Control_mouse_I.D._354'], ['PC.355', 'AACTCGTCGATG',
+'YATGCTGCCTCCCGTAGGAGT', 'Control', '20061218', 'Control_mouse_I.D._355'],
+['PC.356', 'ACAGACCACTCA', 'YATGCTGCCTCCCGTAGGAGT', 'Control', '20061126',
+'Control_mouse_I.D._356'], ['PC.481', 'ACCAGCGACTAG', 'YATGCTGCCTCCCGTAGGAGT',
+'Control', '20070314', 'Control_mouse_I.D._481'], ['PC.593', 'AGCAGCACTTGT',
+'YATGCTGCCTCCCGTAGGAGT', 'Control', '20071210', 'Control_mouse_I.D._593'],
+['PC.607', 'AACTGTGCGTAC', 'YATGCTGCCTCCCGTAGGAGT', 'Fast', '20071112',
+'Fasting_mouse_I.D._607'], ['PC.634', 'ACAGAGTCGGCT', 'YATGCTGCCTCCCGTAGGAGT',
+'Fast', '20080116', 'Fasting_mouse_I.D._634'], ['PC.635', 'ACCGCAGAGTCA',
+'YATGCTGCCTCCCGTAGGAGT', 'Fast', '20080116', 'Fasting_mouse_I.D._635'],
+['PC.636', 'ACGGTGAGTGTC', 'YATGCTGCCTCCCGTAGGAGT', 'Fast', '20080116',
+'Fasting_mouse_I.D._636']]
+
 MAPPING_FILE_DATA_GRADIENT = [
     ['PC.354', 'Control','3', '40', 'Control20061218'],
     ['PC.355', 'Control','9', '44', 'Control20061218'],