Skip to content

Commit

Permalink
Merge 70920ca into fd40a28
Browse files Browse the repository at this point in the history
  • Loading branch information
RNAer committed Jul 3, 2020
2 parents fd40a28 + 70920ca commit 6f539f6
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 31 deletions.
14 changes: 8 additions & 6 deletions calour/amplicon_experiment.py
Expand Up @@ -35,12 +35,12 @@
class AmpliconExperiment(Experiment):
'''This class stores amplicon data and associated metadata.
This is a child class of :class:`.Experiment`
This is a child class of :class:`.Experiment`.
Parameters
----------
data : numpy.ndarray or scipy.sparse.csr_matrix
The abundance table for OTUs, metabolites, genes, etc. Samples
The abundance table for OTUs or ASVs. Samples
are in row and features in column
sample_metadata : pandas.DataFrame
The metadata on the samples
Expand All @@ -55,7 +55,7 @@ class AmpliconExperiment(Experiment):
Attributes
----------
data : numpy.ndarray or scipy.sparse.csr_matrix
The abundance table for OTUs, metabolites, genes, etc. Samples
The abundance table for OTUs or ASVs. Samples
are in row and features in column
sample_metadata : pandas.DataFrame
The metadata on the samples
Expand All @@ -69,11 +69,13 @@ class AmpliconExperiment(Experiment):
store the data as sparse matrix (scipy.sparse.csr_matrix) or dense numpy array.
description : str
name of the experiment
databases : iterable of str
databases for fetching and entering feature annotations
See Also
--------
Experiment
'''
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.heatmap_databases = ('dbbact',)

def filter_taxonomy(exp: Experiment, values, negate=False, inplace=False, substring=True):
'''filter keeping only observations with taxonomy string matching taxonomy
Expand Down
54 changes: 40 additions & 14 deletions calour/experiment.py
Expand Up @@ -34,11 +34,11 @@


class Experiment:
'''This class contains the data for a experiment or a meta experiment.
'''This class contains the data for a experiment or a meta-experiment.
The data set includes a data table (otu table, gene table,
metabolomic table, or all those tables combined), a sample
metadata table, and a feature metadata.
The data set includes 3 aligned tables: a data table (otu table,
gene table, metabolomic table, or all those tables combined), a
sample metadata table, and a feature metadata table.
Parameters
----------
Expand All @@ -64,7 +64,7 @@ class Experiment:
The metadata on the samples
feature_metadata : pandas.DataFrame
The metadata on the features
exp_metadata : dict
metadata : dict
metadata about the experiment (data md5, filenames, etc.)
shape : tuple of (int, int)
the dimension of data
Expand All @@ -75,12 +75,15 @@ class Experiment:
the normalization factor. it is zero if not normalized
description : str
name of the experiment
databases : iterable of str
databases for fetching and entering feature annotations
See Also
--------
AmpliconExperiment
MS1Experiment
'''
def __init__(self, data, sample_metadata, feature_metadata=None,
def __init__(self, data, sample_metadata, feature_metadata=None, databases=(),
exp_metadata=None, description='', sparse=True):
self.data = data
self.sample_metadata = sample_metadata
Expand All @@ -89,19 +92,46 @@ def __init__(self, data, sample_metadata, feature_metadata=None,
self.feature_metadata = feature_metadata
if exp_metadata is None:
exp_metadata = {}
self.validate()
self.exp_metadata = exp_metadata
self.description = description
self.normalized = 0
# the function calling history list
self._call_history = []
# whether to log to history
# whether to log to calling history
self._log = True

# flag if data array is sparse (True) or dense (False)
self.sparse = sparse

# the default databases to use for feature information
self.heatmap_databases = ()
self.databases = databases

def validate(self):
'''Validate the Experiment object.
This simply checks the shape of data table with
sample_metadata and feature_metadata.
Raises
------
ValueError
If the shapes of the 3 tables do not agree.
'''
n_sample, n_feature = self.data.shape
ns = self.sample_metadata.shape[0]
nf = self.feature_metadata.shape[0]
if n_sample != ns:
raise ValueError(
'data table must have the same number of samples with sample_metadata table (%d != %d).' % (n_sample, ns))
if n_feature != nf:
raise ValueError(
'data table must have the same number of features with feature_metadata table (%d != %d).' % (n_feature, nf))
return ns, nf

@property
def shape(self):
return self.validate()

@property
def sparse(self):
Expand Down Expand Up @@ -147,7 +177,7 @@ def __ne__(self, other):
return not (self == other)

def __getitem__(self, pos):
'''Get the abundance at (sampleid, featureid)
'''Get the value from data table for (sample_id, feature_id)
Parameters
----------
Expand All @@ -157,7 +187,7 @@ def __getitem__(self, pos):
Returns
-------
float
The abundance of feature ID in sample ID
The value of feature ID in sample ID
'''
if not isinstance(pos, tuple) or len(pos) != 2:
raise SyntaxError('Must supply sample ID, feature ID')
Expand Down Expand Up @@ -288,10 +318,6 @@ def get_data(self, sparse=None, copy=False):
else:
return self.data

@property
def shape(self):
return self.data.shape

def reorder(self, new_order, axis=0, inplace=False):
'''Reorder according to indices in the new order.
Expand Down
2 changes: 1 addition & 1 deletion calour/heatmap/heatmap.py
Expand Up @@ -544,7 +544,7 @@ def plot(exp: Experiment, title=None,
'''
# set the databases if default requested
if databases is None:
databases = exp.heatmap_databases
databases = exp.databases

if tree is None:
gui_obj = _create_plot_gui(exp, gui, databases)
Expand Down
4 changes: 0 additions & 4 deletions calour/ms1_experiment.py
Expand Up @@ -68,10 +68,6 @@ class MS1Experiment(Experiment):
--------
Experiment
'''
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.heatmap_databases = ('gnps',)

def __repr__(self):
'''Return a string representation of this object.'''
return 'MS1Experiment %s with %d samples, %d features' % (
Expand Down
11 changes: 11 additions & 0 deletions calour/tests/test_experiment.py
Expand Up @@ -235,6 +235,17 @@ def test_getitem_slice(self):
def test_repr(self):
self.assertEqual(repr(self.test1), 'Experiment ("test1.biom") with 21 samples, 12 features')

def test_validate_sample(self):
with self.assertRaises(ValueError, msg='data table must have the same number of samples with sample_metadata table (2 != 1)'):
ca.Experiment(np.array([[1, 2], [3, 4]]),
sample_metadata=pd.DataFrame({'foo': ['a'], 'spam': ['A']}))

def test_validate_feature(self):
with self.assertRaises(ValueError, msg='data table must have the same number of features with feature_metadata table (2 != 1)'):
ca.Experiment(np.array([[1, 2], [3, 4]]),
sample_metadata=pd.DataFrame({'foo': ['a', 'b'], 'spam': ['A', 'B']}),
feature_metadata=pd.DataFrame({'ph': [7]}))


if __name__ == "__main__":
main()
4 changes: 3 additions & 1 deletion calour/transforming.py
Expand Up @@ -110,12 +110,14 @@ def rescale(exp: Experiment, total=10000, axis=0, inplace=False):
def scale(exp: Experiment, axis=0, inplace=False):
'''Standardize a dataset along an axis
This transforms the data into zero mean and unit variance.
.. warning:: It will convert the ``Experiment.data`` from the sparse matrix to dense array.
Parameters
----------
axis : 0, 1, 's', or 'f'
0 or 's' means scaling occur sample-wise; 1 or 'f' feature-wise.
0 or 's' means scaling occurs sample-wise; 1 or 'f' feature-wise.
Returns
-------
Expand Down
15 changes: 10 additions & 5 deletions calour/util.py
Expand Up @@ -388,7 +388,7 @@ def _to_list(x):
def _argsort(values, reverse=False):
'''Sort a sequence of values of heterogeneous variable types.
Used to overcome the problem when using numpy.argsort on a pandas
This is used to overcome the problem when using numpy.argsort on a pandas
series values with missing values
Examples
Expand Down Expand Up @@ -447,13 +447,18 @@ def _clone_function(f):
def register_functions(cls, modules=None):
'''Dynamically register functions to the class as methods.
This searches all the public functions defined in the given
``modules``. If a function with its 1st argument of ``cls`` type,
it will be registered to the ``cls`` class as a method.
Parameters
----------
cls : ``class`` object
The class that the functions will be added to
The class that functions will be added to as methods.
modules : iterable of str, optional
The module names where the functions are defined. ``None`` means all public
The module names where functions are defined. ``None`` means all public
modules in `calour`.
'''
# pattern to recognize the Parameters section
p = re.compile(r"(\n +Parameters\n +-+ *)")
Expand All @@ -480,7 +485,7 @@ def register_functions(cls, modules=None):
updated = ('\n .. note:: This function is also available as a class method :meth:`.{0}.{1}`\n'
'\\1'
'\n exp : {0}'
'\n Input experiment object.'
'\n Input Experiment object.'
'\n')

if not f.__doc__:
Expand All @@ -495,7 +500,7 @@ def deprecated(message):
Parameters
----------
message : str
the message to print together with deprecation info.
the message to print together with deprecation warning.
'''
def deprecated_decorator(func):
@wraps(func)
Expand Down

0 comments on commit 6f539f6

Please sign in to comment.