Skip to content

Commit

Permalink
Update reporting to diff abundance and filtering (#279)
Browse files Browse the repository at this point in the history
* update changelog and remove old parameter doc

* add testing for no samples with value and info printing on various diff. abundance function

* add axis filtered for filter_by_data()

* bugfix in reporting missing samples in _read_metadata()
  • Loading branch information
amnona committed Jul 28, 2022
1 parent 0e2e5f9 commit 8e6b16c
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -4,6 +4,7 @@
## Version 2022.7.1
Incompatible changes:
* Change default join_metadata_fields() inplace parameter to False
* In plot_diff_abundance_enrichment(), plot_enrichment(), Replaced enriched_exp_color parameter with labels_kwargs, numbers_kwargs, to enable better control of the barplot labels

Bug Fixes:
* Fix join_metadata_fields() to use axis='s' by default
Expand Down
18 changes: 16 additions & 2 deletions calour/analysis.py
Expand Up @@ -117,9 +117,11 @@ def correlation(exp: Experiment, field, method='spearman', nonzero=False, transf
# remove the nans
nanpos = np.where(np.isnan(labels))[0]
if len(nanpos) > 0:
logger.warning('NaN values encountered in labels for correlation. Ignoring these samples')
labels = np.delete(labels, nanpos)
data = np.delete(data, nanpos, axis=1)
logger.warning('NaN values encountered in labels for correlation. Ignoring these samples (%d). %d samples left' % (len(nanpos), len(labels)))
if len(labels) == 0:
raise ValueError('Field %s does not seem to contain any samples with numeric value' % field)
# change the method if we have nonzero
if nonzero:
if method == 'spearman':
Expand Down Expand Up @@ -233,7 +235,15 @@ def diff_abundance(exp: Experiment, field, val1, val2=None, method='meandiff', t
# prepare the labels.
labels = np.zeros(len(cexp.sample_metadata))
labels[cexp.sample_metadata[field].isin(val1).values] = 1
logger.info('%d samples with value 1 (%s)' % (np.sum(labels), val1))

# check if we have samples left in val1 and val2
if np.sum(labels) == len(cexp.sample_metadata):
raise ValueError('No samples in field: %s found with val2: %s' % (field, grp2))
if np.sum(labels) == 0:
raise ValueError('No samples in field: %s found with val1: %s' % (field, grp1))

logger.info('%d samples with value 1 (%s), %d samples with value2 (%s)' % (np.sum(labels), grp1, len(cexp.sample_metadata) - np.sum(labels), grp2))

keep, odif, pvals, qvals = dsfdr.dsfdr(data, labels, method=method, transform_type=transform, alpha=alpha, numperm=numperm, fdr_method=fdr_method, shuffler=shuffler, random_seed=random_seed)
logger.info('number of higher in {}: {}. number of higher in {} : {}. total {}'.format(
grp1, np.sum(odif[keep] > 0), grp2, np.sum(odif[keep] < 0), np.sum(keep)))
Expand Down Expand Up @@ -362,6 +372,10 @@ def diff_abundance_paired(exp: Experiment, pair_field, field, val1, val2=None, t
logger.info('Dropping %d values with < 2 samples' % len(drop_values))
exp = exp.filter_samples(pair_field, drop_values, negate=True)

if len(exp.sample_metadata) == 0:
raise ValueError('No samples with >1 value in pair field left')
logger.info('%d samples left after removing group value singletons' % len(exp.sample_metadata))

# create the groups list for the shuffle function
groups = defaultdict(list)
for pos, (idx, crow) in enumerate(exp.sample_metadata.iterrows()):
Expand Down
6 changes: 5 additions & 1 deletion calour/filtering.py
Expand Up @@ -281,7 +281,11 @@ def filter_by_data(exp: Experiment, predicate, axis=1, field=None,
if negate is True:
select = ~ select

logger.info('After filtering, %s remain.' % np.sum(select))
if axis == 0:
grp_str = 'samples'
else:
grp_str = 'features'
logger.info('After filtering, %s %s remaining.' % (np.sum(select), grp_str))
return exp.reorder(select, axis=axis, inplace=inplace)


Expand Down
4 changes: 2 additions & 2 deletions calour/io.py
Expand Up @@ -315,12 +315,12 @@ def _read_metadata(ids, f, kwargs):
diff = mid - ids2
if diff:
logger.warning('Found %d samples that have metadata but do not have data. These samples have been dropped.' % len(diff))
logger.info('First 5 samples without data: %r' % diff[:5])
logger.info('First 5 samples without data: %r' % list(diff)[:5])
logger.debug('These have metadata but do not have data - dropped (%d): %r' % (len(diff), diff))
diff = ids2 - mid
if diff:
logger.warning('Found %d samples that have data but do not have metadata.' % len(diff))
logger.info('First 5 samples without metadata: %r' % diff[:5])
logger.info('First 5 samples without metadata: %r' % list(diff)[:5])
logger.debug('These have data but do not have metadata: %r' % diff)
# reorder the id in metadata to align with biom
# metadata = metadata.loc[ids, ]
Expand Down
8 changes: 0 additions & 8 deletions calour/plotting.py
Expand Up @@ -95,9 +95,6 @@ def plot_enrichment(exp: Experiment, enriched, max_show=10, max_len=40, ax=None,
name for terms enriched in group1 or group2 respectively, or None to not show legend
colors: tuple of (str, str) or None (optional)
Colors for terms enriched in group1 or group2 respectively
enriched_exp_color: str or None, optional
If not None, the color to show the number of enriched experiments for each term in the bar. Default is white since the background is the bar color (green/red).
None to not show the enriched experiments count
labels_kwargs: dict, optional
Additional parameters for the axis ticks labels fonts. See matplolib.axes.Axes.set_yticklabels()
numbers_kwargs: dict or None, optional
Expand Down Expand Up @@ -198,11 +195,6 @@ def plot_diff_abundance_enrichment(exp: Experiment, max_show=10, max_len=40, ax=
Colors for terms enriched in group1 or group2 respectively
show_legend: bool (optional)
True to show the color legend, False to hide it
enriched_exp_color: str or None, optional
If not None, the color to show the number of enriched
experiments for each term in the bar. Default is white since
the background is the bar color (green/red). None to not show
the enriched experiments count
labels_kwargs: dict, optional
Additional parameters to pass to the bar labels rendering. see matplolib.axes.Axes.set_yticklabels()
numbers_kwargs: dict or None, optional
Expand Down

0 comments on commit 8e6b16c

Please sign in to comment.