Skip to content

Commit

Permalink
Update functions in manipulation.py (#273)
Browse files Browse the repository at this point in the history
* fix join_metadata() and version bump

* change join_metadata_fields() default inplace parameter to False

* change join_experiments() to update md5 fields

* change join_experiments() and join_experiments_featurewise() to work better with the field parameter
  • Loading branch information
amnona committed Jul 25, 2022
1 parent 7f1ea2b commit 0c4b379
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 15 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Expand Up @@ -2,7 +2,14 @@


## Version 2022.7.1
Incompatible changes:
* Change default join_metadata_fields() inplace parameter to False

Bug Fixes:
* Fix join_metadata_fields() to use axis='s' by default
* Fix join_experiments() to update exp.info['sample_metadata_md5'] and exp.info['data_md5']
* Fix join_experiments() to make field parameter optional, enable joining when field already exists in the experiment, and update the doc
* Fix join_experiments_featurewise() to make field parameter optional, enable joining when field already exists in the experiment, and update the doc
* Fix join_metadata() to use axis='s' by default
* Fix experiment read functions to show by default only the summary and first 5 samples without data/without metadata

Expand Down
42 changes: 27 additions & 15 deletions calour/manipulation.py
Expand Up @@ -31,7 +31,7 @@
import numpy as np

from .experiment import Experiment
from .util import join_fields
from .util import join_fields, get_data_md5


logger = getLogger(__name__)
Expand Down Expand Up @@ -75,7 +75,7 @@ def chain(exp: Experiment, steps=[], inplace=False, **kwargs) -> Experiment:


def join_metadata_fields(exp: Experiment, field1, field2, new_field=None,
axis=0, inplace=True, **kwargs) -> Experiment:
axis=0, inplace=False, **kwargs) -> Experiment:
'''Join 2 fields in sample or feature metadata into 1.
Parameters
Expand Down Expand Up @@ -199,7 +199,7 @@ def aggregate_by_metadata(exp: Experiment, field, agg='mean', axis=0, inplace=Fa
return exp


def join_experiments(exp: Experiment, other, field, labels=('exp', 'other'), prefixes=None) -> Experiment:
def join_experiments(exp: Experiment, other, field='_calour_original_experiment', labels=('exp', 'other'), prefixes=None) -> Experiment:
'''Combine two :class:`.Experiment` objects into one.
This assumes the same feature in the 2 joining experiments has
Expand All @@ -216,9 +216,10 @@ def join_experiments(exp: Experiment, other, field, labels=('exp', 'other'), pre
from exp and not from other.
field : None or str
Name of the new ``sample_metdata`` field containing the experiment each sample is coming from.
If it is None, don't add such column. The values in this column will be "exp" and "other".
The values in this column are supplied in the labels parameter.
If it is None, don't add such column.
labels : tuple of (str, str)
Only used if `field` is not `None`. Label which experiments each sample is from.
Only used if `field` is not `None`. Label which experiment each sample is from.
prefixes : tuple of (str, str), optional
Prefix to prepend to the sample_metadata index for identical samples in the 2 experiments.
Required only if the two experiments share any identical sample ID.
Expand Down Expand Up @@ -273,12 +274,16 @@ def join_experiments(exp: Experiment, other, field, labels=('exp', 'other'), pre
all_data[len(smd1):len(smd), idx] = other.get_data(sparse=False)
newexp.data = all_data

# update the md5 hashes for the joined experiment
newexp.info['data_md5'] = get_data_md5(newexp.data)
newexp.info['sample_metadata_md5'] = exp.info['sample_metadata_md5']

# validate the combined experiment at last
newexp.validate()
return newexp


def join_experiments_featurewise(exp: Experiment, other, field, labels=('exp', 'other'), prefixes=None) -> Experiment:
def join_experiments_featurewise(exp: Experiment, other, field='_calour_original_experiment', labels=('exp', 'other'), prefixes=None) -> Experiment:
'''Combine two :class:`.Experiment` objects into one.
An example of use cases is to combine the 16S and ITS amplicon
Expand All @@ -292,11 +297,12 @@ def join_experiments_featurewise(exp: Experiment, other, field, labels=('exp', '
----------
other : :class:`.Experiment`
The ``Experiment`` object to combine with the current one.
field : ``None`` or str
Name of the new ``feature_metdata`` field containing the experiment each feature is coming from.
If it is None, don't add such column. The values in this column will be "exp" and "other".
field : None or str
Name of the new ``sample_metdata`` field containing the experiment each sample is coming from.
The values in this column are supplied in the labels parameter.
If it is None, don't add such column.
labels : tuple of (str, str)
Only used if `field` is not `None`. Label which experiments each features is from.
Only used if `field` is not `None`. Label which experiment each sample is from.
prefixes : tuple of (str, str), optional
Prefix to prepend to the feature_metadata index for identical feature IDs in the 2 experiments.
Required only if the two experiments share any identical feature ID.
Expand Down Expand Up @@ -346,9 +352,15 @@ def _check_id_overlap_then_concat(df1, df2, prefixes, field, labels):

df = pd.concat([df1, df2], join='outer')
if field is not None:
if field in df.columns:
raise ValueError(
'Column name %s already exists in the metadata - '
'please give a different name' % field)
df[field] = [labels[0]] * df1.shape[0] + [labels[1]] * df2.shape[0]
if field in df1.columns:
vals1 = list(df1[field].values)
else:
vals1 = [labels[0]] * df1.shape[0]

if field in df2.columns:
vals2 = list(df2[field].values)
else:
vals2 = [labels[1]] * df2.shape[0]

df[field] = vals1 + vals2
return df

0 comments on commit 0c4b379

Please sign in to comment.