Skip to content

Commit

Permalink
ENH: Fix problem with --colory_by
Browse files Browse the repository at this point in the history
When nothing was passed in to --color_by, the default behaviour was to
keep all the columns in the mapping file. This was inconsistent with the
documented behavior, to remove unique columns, which has now been
corrected. See the comments in this patch.

Fixes #271
  • Loading branch information
ElDeveloper committed Sep 22, 2014
1 parent f9880e2 commit 155444f
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 5 deletions.
31 changes: 27 additions & 4 deletions emperor/util.py
Expand Up @@ -134,7 +134,8 @@ def preprocess_mapping_file(data, headers, columns, unique=False, single=False,
# process concatenated columns if needed
merge = []
for column in columns:
if '&&' in column:
# the list can contain None so check "if column" before treating as str
if column and '&&' in column:
merge.append(column)
# each element needs several columns to be merged
for new_column in merge:
Expand All @@ -152,16 +153,38 @@ def preprocess_mapping_file(data, headers, columns, unique=False, single=False,
columns_to_remove = []
metadata = MetadataMap(mapping_file_to_dict(data, headers), [])

# the --coloy_by option in the script interface allows the user to
# specify the categories you want to use in the generated plot, this
# the default behaviour is to color by all categories that are not
# unique. If the user specifies a category with with the --color_by
# option and this category contains a unique values, this category must
# still be added thus the structure of the next few lines that
# form the structure for the two different routes. (1) where no value
# is specified in the CLI (the value of columns will be [None, x1, x2,
# x3] where x{1,2,3} are categories requested in other CLI options) and
# (2) where a value is specified in the CLI.
#
# TL;DR
# see https://github.com/biocore/emperor/issues/271
if None in columns:
columns = headers[:]
f_unique = metadata.hasUniqueCategoryValues
f_single = metadata.hasSingleCategoryValue
else:
f_unique = lambda x: metadata.hasUniqueCategoryValues(x) and\
x not in columns
f_single = lambda x: metadata.hasSingleCategoryValue(x) and\
x not in columns

# find columns that have values that are all unique
if unique:
for c in headers[1::]:
if metadata.hasUniqueCategoryValues(c) and c not in columns:
if f_unique(c):
columns_to_remove.append(c)

# remove categories where there is only one value
if single:
for c in headers[1::]:
if metadata.hasSingleCategoryValue(c) and c not in columns:
if f_single(c):
columns_to_remove.append(c)
columns_to_remove = list(set(columns_to_remove))

Expand Down
2 changes: 1 addition & 1 deletion scripts/make_emperor.py
Expand Up @@ -500,7 +500,7 @@ def main():
offending_fields.append(col)
else:
# if the user didn't specify the header names display everything
color_by_column_names = header[:]
color_by_column_names = [None]

# extract a list of the custom axes provided and each element is numeric
if custom_axes:
Expand Down
18 changes: 18 additions & 0 deletions tests/test_util.py
Expand Up @@ -178,6 +178,14 @@ def test_preprocess_mapping_file(self):
'LinkerPrimerSequence', 'Treatment', 'DOB', 'Description'])
self.assertEquals(out_data, MAPPING_FILE_DATA_CAT_G)

# check it remove columns that are unique and keeps all the other ones
out_data, out_headers = preprocess_mapping_file(self.mapping_file_data,
['SampleID', 'BarcodeSequence', 'LinkerPrimerSequence',
'Treatment', 'DOB', 'Description'], [None], unique=True)
self.assertEquals(out_headers, ['SampleID', 'LinkerPrimerSequence',
'Treatment', 'DOB'])
self.assertEquals(out_data, MAPPING_FILE_DATA_CAT_H)

def test_keep_columns_from_mapping_file(self):
"""Check correct selection of metadata is being done"""

Expand Down Expand Up @@ -495,6 +503,16 @@ def test_sanitize_mapping_file(self):
['PC.636', 'ACGGTGAGTGTC', 'YATGCTGCCTCCCGTAGGAGT', 'Fast', '20080116',
'Fasting_mouse_I.D._636']]

MAPPING_FILE_DATA_CAT_H = [['PC.354', 'YATGCTGCCTCCCGTAGGAGT', 'Control',
'20061218'], ['PC.355', 'YATGCTGCCTCCCGTAGGAGT', 'Control', '20061218'],
['PC.356', 'YATGCTGCCTCCCGTAGGAGT', 'Control', '20061126'], ['PC.481',
'YATGCTGCCTCCCGTAGGAGT', 'Control', '20070314'], ['PC.593',
'YATGCTGCCTCCCGTAGGAGT', 'Control', '20071210'], ['PC.607',
'YATGCTGCCTCCCGTAGGAGT', 'Fast', '20071112' ], ['PC.634',
'YATGCTGCCTCCCGTAGGAGT', 'Fast', '20080116'], ['PC.635',
'YATGCTGCCTCCCGTAGGAGT', 'Fast', '20080116'], ['PC.636',
'YATGCTGCCTCCCGTAGGAGT', 'Fast', '20080116' ]]

MAPPING_FILE_DATA_GRADIENT = [
['PC.354', 'Control','3', '40', 'Control20061218'],
['PC.355', 'Control','9', '44', 'Control20061218'],
Expand Down

0 comments on commit 155444f

Please sign in to comment.