diff --git a/CHANGELOG.md b/CHANGELOG.md index bdd9cdad..49424cb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ## Version 2022.7.1 Incompatible changes: * Change default join_metadata_fields() inplace parameter to False +* In plot_diff_abundance_enrichment(), plot_enrichment(), Replaced enriched_exp_color parameter with labels_kwargs, numbers_kwargs, to enable better control of the barplot labels Bug Fixes: * Fix join_metadata_fields() to use axis='s' by default diff --git a/calour/analysis.py b/calour/analysis.py index 0838c3c7..93e6dd88 100644 --- a/calour/analysis.py +++ b/calour/analysis.py @@ -117,9 +117,11 @@ def correlation(exp: Experiment, field, method='spearman', nonzero=False, transf # remove the nans nanpos = np.where(np.isnan(labels))[0] if len(nanpos) > 0: - logger.warning('NaN values encountered in labels for correlation. Ignoring these samples') labels = np.delete(labels, nanpos) data = np.delete(data, nanpos, axis=1) + logger.warning('NaN values encountered in labels for correlation. Ignoring these samples (%d). %d samples left' % (len(nanpos), len(labels))) + if len(labels) == 0: + raise ValueError('Field %s does not seem to contain any samples with numeric value' % field) # change the method if we have nonzero if nonzero: if method == 'spearman': @@ -233,7 +235,15 @@ def diff_abundance(exp: Experiment, field, val1, val2=None, method='meandiff', t # prepare the labels. labels = np.zeros(len(cexp.sample_metadata)) labels[cexp.sample_metadata[field].isin(val1).values] = 1 - logger.info('%d samples with value 1 (%s)' % (np.sum(labels), val1)) + + # check if we have samples left in val1 and val2 + if np.sum(labels) == len(cexp.sample_metadata): + raise ValueError('No samples in field: %s found with val2: %s' % (field, grp2)) + if np.sum(labels) == 0: + raise ValueError('No samples in field: %s found with val1: %s' % (field, grp1)) + + logger.info('%d samples with value 1 (%s), %d samples with value2 (%s)' % (np.sum(labels), grp1, len(cexp.sample_metadata) - np.sum(labels), grp2)) + keep, odif, pvals, qvals = dsfdr.dsfdr(data, labels, method=method, transform_type=transform, alpha=alpha, numperm=numperm, fdr_method=fdr_method, shuffler=shuffler, random_seed=random_seed) logger.info('number of higher in {}: {}. number of higher in {} : {}. total {}'.format( grp1, np.sum(odif[keep] > 0), grp2, np.sum(odif[keep] < 0), np.sum(keep))) @@ -362,6 +372,10 @@ def diff_abundance_paired(exp: Experiment, pair_field, field, val1, val2=None, t logger.info('Dropping %d values with < 2 samples' % len(drop_values)) exp = exp.filter_samples(pair_field, drop_values, negate=True) + if len(exp.sample_metadata) == 0: + raise ValueError('No samples with >1 value in pair field left') + logger.info('%d samples left after removing group value singletons' % len(exp.sample_metadata)) + # create the groups list for the shuffle function groups = defaultdict(list) for pos, (idx, crow) in enumerate(exp.sample_metadata.iterrows()): diff --git a/calour/filtering.py b/calour/filtering.py index 822e1102..64eaedf0 100644 --- a/calour/filtering.py +++ b/calour/filtering.py @@ -281,7 +281,11 @@ def filter_by_data(exp: Experiment, predicate, axis=1, field=None, if negate is True: select = ~ select - logger.info('After filtering, %s remain.' % np.sum(select)) + if axis == 0: + grp_str = 'samples' + else: + grp_str = 'features' + logger.info('After filtering, %s %s remaining.' % (np.sum(select), grp_str)) return exp.reorder(select, axis=axis, inplace=inplace) diff --git a/calour/io.py b/calour/io.py index 8b37799b..1671b2c0 100644 --- a/calour/io.py +++ b/calour/io.py @@ -315,12 +315,12 @@ def _read_metadata(ids, f, kwargs): diff = mid - ids2 if diff: logger.warning('Found %d samples that have metadata but do not have data. These samples have been dropped.' % len(diff)) - logger.info('First 5 samples without data: %r' % diff[:5]) + logger.info('First 5 samples without data: %r' % list(diff)[:5]) logger.debug('These have metadata but do not have data - dropped (%d): %r' % (len(diff), diff)) diff = ids2 - mid if diff: logger.warning('Found %d samples that have data but do not have metadata.' % len(diff)) - logger.info('First 5 samples without metadata: %r' % diff[:5]) + logger.info('First 5 samples without metadata: %r' % list(diff)[:5]) logger.debug('These have data but do not have metadata: %r' % diff) # reorder the id in metadata to align with biom # metadata = metadata.loc[ids, ] diff --git a/calour/plotting.py b/calour/plotting.py index 3de9930d..4eb39613 100644 --- a/calour/plotting.py +++ b/calour/plotting.py @@ -95,9 +95,6 @@ def plot_enrichment(exp: Experiment, enriched, max_show=10, max_len=40, ax=None, name for terms enriched in group1 or group2 respectively, or None to not show legend colors: tuple of (str, str) or None (optional) Colors for terms enriched in group1 or group2 respectively - enriched_exp_color: str or None, optional - If not None, the color to show the number of enriched experiments for each term in the bar. Default is white since the background is the bar color (green/red). - None to not show the enriched experiments count labels_kwargs: dict, optional Additional parameters for the axis ticks labels fonts. See matplolib.axes.Axes.set_yticklabels() numbers_kwargs: dict or None, optional @@ -198,11 +195,6 @@ def plot_diff_abundance_enrichment(exp: Experiment, max_show=10, max_len=40, ax= Colors for terms enriched in group1 or group2 respectively show_legend: bool (optional) True to show the color legend, False to hide it - enriched_exp_color: str or None, optional - If not None, the color to show the number of enriched - experiments for each term in the bar. Default is white since - the background is the bar color (green/red). None to not show - the enriched experiments count labels_kwargs: dict, optional Additional parameters to pass to the bar labels rendering. see matplolib.axes.Axes.set_yticklabels() numbers_kwargs: dict or None, optional