From 4812eb869482576e51a05cd2ca9345790b69cab6 Mon Sep 17 00:00:00 2001 From: Zech Xu Date: Fri, 3 Jul 2020 11:37:57 +0800 Subject: [PATCH 1/3] clean up code and doc --- calour/amplicon_experiment.py | 2 +- calour/experiment.py | 23 ++++++++++++----------- calour/heatmap/heatmap.py | 2 +- calour/ms1_experiment.py | 2 +- calour/transforming.py | 4 +++- calour/util.py | 15 ++++++++++----- 6 files changed, 28 insertions(+), 20 deletions(-) diff --git a/calour/amplicon_experiment.py b/calour/amplicon_experiment.py index 59da6bfa..811eeac8 100644 --- a/calour/amplicon_experiment.py +++ b/calour/amplicon_experiment.py @@ -73,7 +73,7 @@ class AmpliconExperiment(Experiment): ''' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.heatmap_databases = ('dbbact',) + self.databases = ('dbbact',) def filter_taxonomy(exp: Experiment, values, negate=False, inplace=False, substring=True): '''filter keeping only observations with taxonomy string matching taxonomy diff --git a/calour/experiment.py b/calour/experiment.py index b7bdd5f2..3a2124a9 100644 --- a/calour/experiment.py +++ b/calour/experiment.py @@ -34,11 +34,11 @@ class Experiment: - '''This class contains the data for a experiment or a meta experiment. + '''This class contains the data for a experiment or a meta-experiment. - The data set includes a data table (otu table, gene table, - metabolomic table, or all those tables combined), a sample - metadata table, and a feature metadata. + The data set includes 3 aligned tables: a data table (otu table, + gene table, metabolomic table, or all those tables combined), a + sample metadata table, and a feature metadata table. Parameters ---------- @@ -79,8 +79,9 @@ class Experiment: See Also -------- AmpliconExperiment + MS1Experiment ''' - def __init__(self, data, sample_metadata, feature_metadata=None, + def __init__(self, data, sample_metadata, feature_metadata=None, databases=(), exp_metadata=None, description='', sparse=True): self.data = data self.sample_metadata = sample_metadata @@ -94,14 +95,18 @@ def __init__(self, data, sample_metadata, feature_metadata=None, self.normalized = 0 # the function calling history list self._call_history = [] - # whether to log to history + # whether to log to calling history self._log = True # flag if data array is sparse (True) or dense (False) self.sparse = sparse # the default databases to use for feature information - self.heatmap_databases = () + self.databases = databases + + @property + def shape(self): + return self.data.shape @property def sparse(self): @@ -288,10 +293,6 @@ def get_data(self, sparse=None, copy=False): else: return self.data - @property - def shape(self): - return self.data.shape - def reorder(self, new_order, axis=0, inplace=False): '''Reorder according to indices in the new order. diff --git a/calour/heatmap/heatmap.py b/calour/heatmap/heatmap.py index d0cac6ae..3b904389 100644 --- a/calour/heatmap/heatmap.py +++ b/calour/heatmap/heatmap.py @@ -544,7 +544,7 @@ def plot(exp: Experiment, title=None, ''' # set the databases if default requested if databases is None: - databases = exp.heatmap_databases + databases = exp.databases if tree is None: gui_obj = _create_plot_gui(exp, gui, databases) diff --git a/calour/ms1_experiment.py b/calour/ms1_experiment.py index f7985481..af05bf62 100644 --- a/calour/ms1_experiment.py +++ b/calour/ms1_experiment.py @@ -70,7 +70,7 @@ class MS1Experiment(Experiment): ''' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.heatmap_databases = ('gnps',) + self.databases = ('gnps',) def __repr__(self): '''Return a string representation of this object.''' diff --git a/calour/transforming.py b/calour/transforming.py index a903d16a..5c411ae5 100644 --- a/calour/transforming.py +++ b/calour/transforming.py @@ -110,12 +110,14 @@ def rescale(exp: Experiment, total=10000, axis=0, inplace=False): def scale(exp: Experiment, axis=0, inplace=False): '''Standardize a dataset along an axis + This transforms the data into zero mean and unit variance. + .. warning:: It will convert the ``Experiment.data`` from the sparse matrix to dense array. Parameters ---------- axis : 0, 1, 's', or 'f' - 0 or 's' means scaling occur sample-wise; 1 or 'f' feature-wise. + 0 or 's' means scaling occurs sample-wise; 1 or 'f' feature-wise. Returns ------- diff --git a/calour/util.py b/calour/util.py index af4ca394..dd0f33a6 100644 --- a/calour/util.py +++ b/calour/util.py @@ -388,7 +388,7 @@ def _to_list(x): def _argsort(values, reverse=False): '''Sort a sequence of values of heterogeneous variable types. - Used to overcome the problem when using numpy.argsort on a pandas + This is used to overcome the problem when using numpy.argsort on a pandas series values with missing values Examples @@ -447,13 +447,18 @@ def _clone_function(f): def register_functions(cls, modules=None): '''Dynamically register functions to the class as methods. + This searches all the public functions defined in the given + ``modules``. If a function with its 1st argument of ``cls`` type, + it will be registered to the ``cls`` class as a method. + Parameters ---------- cls : ``class`` object - The class that the functions will be added to + The class that functions will be added to as methods. modules : iterable of str, optional - The module names where the functions are defined. ``None`` means all public + The module names where functions are defined. ``None`` means all public modules in `calour`. + ''' # pattern to recognize the Parameters section p = re.compile(r"(\n +Parameters\n +-+ *)") @@ -480,7 +485,7 @@ def register_functions(cls, modules=None): updated = ('\n .. note:: This function is also available as a class method :meth:`.{0}.{1}`\n' '\\1' '\n exp : {0}' - '\n Input experiment object.' + '\n Input Experiment object.' '\n') if not f.__doc__: @@ -495,7 +500,7 @@ def deprecated(message): Parameters ---------- message : str - the message to print together with deprecation info. + the message to print together with deprecation warning. ''' def deprecated_decorator(func): @wraps(func) From 58848de9d47baf222431f279bc9ee3fbc504b775 Mon Sep 17 00:00:00 2001 From: Zech Xu Date: Fri, 3 Jul 2020 12:19:57 +0800 Subject: [PATCH 2/3] add Experiment.validate() --- calour/experiment.py | 28 +++++++++++++++++++++++++--- calour/tests/test_experiment.py | 11 +++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/calour/experiment.py b/calour/experiment.py index 3a2124a9..50880b10 100644 --- a/calour/experiment.py +++ b/calour/experiment.py @@ -64,7 +64,7 @@ class Experiment: The metadata on the samples feature_metadata : pandas.DataFrame The metadata on the features - exp_metadata : dict + metadata : dict metadata about the experiment (data md5, filenames, etc.) shape : tuple of (int, int) the dimension of data @@ -90,6 +90,7 @@ def __init__(self, data, sample_metadata, feature_metadata=None, databases=(), self.feature_metadata = feature_metadata if exp_metadata is None: exp_metadata = {} + self.validate() self.exp_metadata = exp_metadata self.description = description self.normalized = 0 @@ -104,6 +105,27 @@ def __init__(self, data, sample_metadata, feature_metadata=None, databases=(), # the default databases to use for feature information self.databases = databases + def validate(self): + '''Validate the Experiment object. + + This simply checks the shape of data table with + sample_metadata and feature_metadata. + + Raises + ------ + ValueError + If the shapes of the 3 tables do not agree. + ''' + n_sample, n_feature = self.data.shape + ns = self.sample_metadata.shape[0] + nf = self.feature_metadata.shape[0] + if n_sample != ns: + raise ValueError( + 'data table must have the same number of samples with sample_metadata table (%d != %d).' % (n_sample, ns)) + if n_feature != nf: + raise ValueError( + 'data table must have the same number of features with feature_metadata table (%d != %d).' % (n_feature, nf)) + @property def shape(self): return self.data.shape @@ -152,7 +174,7 @@ def __ne__(self, other): return not (self == other) def __getitem__(self, pos): - '''Get the abundance at (sampleid, featureid) + '''Get the value from data table for (sample_id, feature_id) Parameters ---------- @@ -162,7 +184,7 @@ def __getitem__(self, pos): Returns ------- float - The abundance of feature ID in sample ID + The value of feature ID in sample ID ''' if not isinstance(pos, tuple) or len(pos) != 2: raise SyntaxError('Must supply sample ID, feature ID') diff --git a/calour/tests/test_experiment.py b/calour/tests/test_experiment.py index 9e6c101a..9b4a232c 100644 --- a/calour/tests/test_experiment.py +++ b/calour/tests/test_experiment.py @@ -235,6 +235,17 @@ def test_getitem_slice(self): def test_repr(self): self.assertEqual(repr(self.test1), 'Experiment ("test1.biom") with 21 samples, 12 features') + def test_validate_sample(self): + with self.assertRaises(ValueError, msg='data table must have the same number of samples with sample_metadata table (2 != 1)'): + ca.Experiment(np.array([[1, 2], [3, 4]]), + sample_metadata=pd.DataFrame({'foo': ['a'], 'spam': ['A']})) + + def test_validate_feature(self): + with self.assertRaises(ValueError, msg='data table must have the same number of features with feature_metadata table (2 != 1)'): + ca.Experiment(np.array([[1, 2], [3, 4]]), + sample_metadata=pd.DataFrame({'foo': ['a', 'b'], 'spam': ['A', 'B']}), + feature_metadata=pd.DataFrame({'ph': [7]})) + if __name__ == "__main__": main() From 70920ca5fba0af807f2d44a2862fa6803e32a3d7 Mon Sep 17 00:00:00 2001 From: Zech Xu Date: Sat, 4 Jul 2020 01:08:08 +0800 Subject: [PATCH 3/3] remove init from Experiment child class --- calour/amplicon_experiment.py | 14 ++++++++------ calour/experiment.py | 5 ++++- calour/ms1_experiment.py | 4 ---- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/calour/amplicon_experiment.py b/calour/amplicon_experiment.py index 811eeac8..0a6a90ec 100644 --- a/calour/amplicon_experiment.py +++ b/calour/amplicon_experiment.py @@ -35,12 +35,12 @@ class AmpliconExperiment(Experiment): '''This class stores amplicon data and associated metadata. - This is a child class of :class:`.Experiment` + This is a child class of :class:`.Experiment`. Parameters ---------- data : numpy.ndarray or scipy.sparse.csr_matrix - The abundance table for OTUs, metabolites, genes, etc. Samples + The abundance table for OTUs or ASVs. Samples are in row and features in column sample_metadata : pandas.DataFrame The metadata on the samples @@ -55,7 +55,7 @@ class AmpliconExperiment(Experiment): Attributes ---------- data : numpy.ndarray or scipy.sparse.csr_matrix - The abundance table for OTUs, metabolites, genes, etc. Samples + The abundance table for OTUs or ASVs. Samples are in row and features in column sample_metadata : pandas.DataFrame The metadata on the samples @@ -69,11 +69,13 @@ class AmpliconExperiment(Experiment): store the data as sparse matrix (scipy.sparse.csr_matrix) or dense numpy array. description : str name of the experiment + databases : iterable of str + databases for fetching and entering feature annotations + See Also + -------- + Experiment ''' - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.databases = ('dbbact',) def filter_taxonomy(exp: Experiment, values, negate=False, inplace=False, substring=True): '''filter keeping only observations with taxonomy string matching taxonomy diff --git a/calour/experiment.py b/calour/experiment.py index 50880b10..e44ba26f 100644 --- a/calour/experiment.py +++ b/calour/experiment.py @@ -75,6 +75,8 @@ class Experiment: the normalization factor. it is zero if not normalized description : str name of the experiment + databases : iterable of str + databases for fetching and entering feature annotations See Also -------- @@ -125,10 +127,11 @@ def validate(self): if n_feature != nf: raise ValueError( 'data table must have the same number of features with feature_metadata table (%d != %d).' % (n_feature, nf)) + return ns, nf @property def shape(self): - return self.data.shape + return self.validate() @property def sparse(self): diff --git a/calour/ms1_experiment.py b/calour/ms1_experiment.py index af05bf62..74579578 100644 --- a/calour/ms1_experiment.py +++ b/calour/ms1_experiment.py @@ -68,10 +68,6 @@ class MS1Experiment(Experiment): -------- Experiment ''' - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.databases = ('gnps',) - def __repr__(self): '''Return a string representation of this object.''' return 'MS1Experiment %s with %d samples, %d features' % (