From 422648d0771c4dc62a93c7994e23693c8ee02b0b Mon Sep 17 00:00:00 2001 From: Zech Xu Date: Fri, 23 Mar 2018 22:47:19 -0700 Subject: [PATCH] add a couple functions (#88) * add dropna function * merge dropna into filter_by_metadata keep only needed kwargs using docrep allow set in _to_list() * fix plot_shareness() * add join experiment featurewise and fix plot_core_features() fix other minor things * fix a edge bug when the function param is a tuple * rename doc_init to _doc and fix pep8 * add docstring --- calour/__init__.py | 4 +- calour/{doc_init.py => _doc.py} | 0 calour/amplicon_experiment.py | 28 +++++++++++++ calour/experiment.py | 3 +- calour/filtering.py | 8 ++-- calour/heatmap/heatmap.py | 4 +- calour/heatmap/plotgui.py | 2 +- calour/heatmap/plotgui_cli.py | 2 +- calour/heatmap/plotgui_jupyter.py | 4 +- calour/heatmap/plotgui_qt5.py | 2 +- calour/io.py | 7 +--- calour/manipulation.py | 47 ++++++++++++++++++++++ calour/plotting.py | 66 ++++++++++++++++++++----------- calour/sorting.py | 2 +- calour/tests/test_manipulation.py | 25 +++++++++++- calour/tests/test_plotting.py | 15 ++++--- calour/tests/test_training.py | 16 ++++++-- calour/training.py | 62 ++++++++++++++++++++++++++--- calour/transforming.py | 5 ++- calour/util.py | 3 ++ 20 files changed, 246 insertions(+), 59 deletions(-) rename calour/{doc_init.py => _doc.py} (100%) diff --git a/calour/__init__.py b/calour/__init__.py index 302adb18..e3db53f7 100644 --- a/calour/__init__.py +++ b/calour/__init__.py @@ -34,7 +34,5 @@ for fn, f in inspect.getmembers(Experiment, predicate=inspect.isfunction): setattr(Experiment, fn, _convert_axis_name(f)) -log = resource_filename(__package__, 'log.cfg') - # setting False allows other logger to print log. -fileConfig(log, disable_existing_loggers=False) +fileConfig(resource_filename(__package__, 'log.cfg'), disable_existing_loggers=False) diff --git a/calour/doc_init.py b/calour/_doc.py similarity index 100% rename from calour/doc_init.py rename to calour/_doc.py diff --git a/calour/amplicon_experiment.py b/calour/amplicon_experiment.py index d43c8201..05ec00d6 100644 --- a/calour/amplicon_experiment.py +++ b/calour/amplicon_experiment.py @@ -254,3 +254,31 @@ def split_taxonomy(self, field='taxonomy', sep=';', the column names for the new columns split from ``field`` ''' self.feature_metadata[names] = self.feature_metadata[field].str.split(sep, expand=True) + # return so you can chain the functions + return self + + def find_lowest_taxonomy(self, field='taxonomy', new_field='taxa'): + '''Create a new column that contains the taxonomy of lowest possible level. + + For example, 'k__Bacteria; p__Firmicutes; c__Bacilli, + o__Lactobacillales; f__Enterococcaceae; g__Enterococcus, + s__' will return 'g__Enterococcus' + + Parameters + ---------- + field : str + column name that contains all levels of taxonomy + new_field : str + new column name + + Returns + ------- + AmpliconExperiment + + ''' + def find_highest(s): + l = s.split(';') + b = [len(i) > 3 for i in l] + return np.array(l)[b][-1] + self.feature_metadata[new_field] = self.feature_metadata[field].apply(find_highest) + return self diff --git a/calour/experiment.py b/calour/experiment.py index 52980b83..42bdb09c 100644 --- a/calour/experiment.py +++ b/calour/experiment.py @@ -227,7 +227,8 @@ def inner(*args, **kwargs): logger.debug('Run func {}'.format(fn)) new_exp = func(*args, **kwargs) if exp._log is True: - param = ['%r' % i for i in args[1:]] + ['%s=%r' % (k, v) for k, v in kwargs.items()] + # do not use `'%r' % i` because it causes error when i is a tuple + param = ['{!r}'.format(i) for i in args[1:]] + ['{0!s}={1!r}'.format(k, v) for k, v in kwargs.items()] param = ', '.join(param) new_exp._call_history.append('{0}({1})'.format(fn, param)) exp._log = False diff --git a/calour/filtering.py b/calour/filtering.py index bb439380..045d65a7 100644 --- a/calour/filtering.py +++ b/calour/filtering.py @@ -36,9 +36,9 @@ import numpy as np from scipy.sparse import issparse -from .experiment import Experiment +from . import Experiment +from ._doc import ds from .util import _to_list -from .doc_init import ds logger = getLogger(__name__) @@ -514,7 +514,9 @@ def filter_mean(exp: Experiment, cutoff=0.01, **kwargs): @Experiment._record_sig def filter_ids(exp: Experiment, ids, axis=1, negate=False, inplace=False): - '''Filter samples or features based on a list index values + '''Filter samples or features based on a list IDs. + + .. note:: the order of samples or features is updated as the order given in ``ids``. Parameters ---------- diff --git a/calour/heatmap/heatmap.py b/calour/heatmap/heatmap.py index d0ffcca4..31979524 100644 --- a/calour/heatmap/heatmap.py +++ b/calour/heatmap/heatmap.py @@ -16,9 +16,9 @@ from .. import Experiment from ..database import _get_database_class -from .._dendrogram import plot_tree from ..util import _to_list, _transition_index -from ..doc_init import ds +from .._doc import ds +from .._dendrogram import plot_tree logger = getLogger(__name__) diff --git a/calour/heatmap/plotgui.py b/calour/heatmap/plotgui.py index fc2462c7..31d2340e 100644 --- a/calour/heatmap/plotgui.py +++ b/calour/heatmap/plotgui.py @@ -12,7 +12,7 @@ import numpy as np from matplotlib.gridspec import GridSpec -from ..doc_init import ds +from .._doc import ds logger = getLogger(__name__) diff --git a/calour/heatmap/plotgui_cli.py b/calour/heatmap/plotgui_cli.py index 089866ee..d345a0ef 100644 --- a/calour/heatmap/plotgui_cli.py +++ b/calour/heatmap/plotgui_cli.py @@ -7,7 +7,7 @@ # ---------------------------------------------------------------------------- from .plotgui import PlotGUI -from ..doc_init import ds +from .._doc import ds class PlotGUI_CLI(PlotGUI): diff --git a/calour/heatmap/plotgui_jupyter.py b/calour/heatmap/plotgui_jupyter.py index d0dad7aa..211843f8 100644 --- a/calour/heatmap/plotgui_jupyter.py +++ b/calour/heatmap/plotgui_jupyter.py @@ -5,12 +5,13 @@ import matplotlib from .plotgui import PlotGUI -from ..doc_init import ds +from .._doc import ds logger = getLogger(__name__) +@ds.with_indent(4) class PlotGUI_Jupyter(PlotGUI): '''Jupyter GUI of plotting. @@ -19,6 +20,7 @@ class PlotGUI_Jupyter(PlotGUI): Parameters ---------- + %(PlotGUI.parameters)s ''' @ds.with_indent(8) diff --git a/calour/heatmap/plotgui_qt5.py b/calour/heatmap/plotgui_qt5.py index dbbe0425..a426b34e 100644 --- a/calour/heatmap/plotgui_qt5.py +++ b/calour/heatmap/plotgui_qt5.py @@ -11,7 +11,7 @@ from PyQt5.QtCore import Qt from .plotgui import PlotGUI -from ..doc_init import ds +from .._doc import ds logger = getLogger(__name__) diff --git a/calour/io.py b/calour/io.py index 05ead062..0e79b9df 100644 --- a/calour/io.py +++ b/calour/io.py @@ -32,12 +32,9 @@ import pandas as pd import biom -from .experiment import Experiment -from .amplicon_experiment import AmpliconExperiment -from .ms1_experiment import MS1Experiment +from . import Experiment, AmpliconExperiment, MS1Experiment from .util import get_file_md5, get_data_md5, _get_taxonomy_string - -from .doc_init import ds +from ._doc import ds logger = getLogger(__name__) diff --git a/calour/manipulation.py b/calour/manipulation.py index 018c98b8..4bfbacb2 100644 --- a/calour/manipulation.py +++ b/calour/manipulation.py @@ -267,3 +267,50 @@ def join_experiments(exp: Experiment, other, field_name='experiments', prefixes= newexp.data = all_data return newexp + + +@Experiment._record_sig +def join_experiments_featurewise(exp: Experiment, other, + field_name='_feature_origin_', origin_labels=('exp1', 'exp2')): + '''Combine two :class:`.Experiment` objects into one. + + An example of user cases is to combine the 16S and ITS amplicon data together. + + .. warning:: If a sample has only features in one Experiment + object and not the other, the sample will be dropped from joining. + + Parameters + ---------- + other : :class:`.Experiment` + The ``Experiment`` object to combine with the current one. If + both experiments contain the same feature metadata column and + there is a conflict between the two, the value will be taken + from exp and not from other. + field_name : ``None`` or str (optional) + Name of the new ``feature_metdata`` field containing the experiment each feature is coming from. + If it is None, don't add such column. + labels : tuple of (str, str) (optional) + The text to label which experiment the feature is originated from. + + Returns + ------- + :class:`.Experiment` + A new experiment with samples from both experiments concatenated, features from both + experiments merged. + + ''' + logger.debug('Join 2 experiments featurewise:\n{!r}\n{!r}'.format(exp, other)) + # create an empty object + newexp = exp.__class__(np.empty(shape=[0, 0]), pd.DataFrame(), + description='join %s & %s' % (exp.description, other.description)) + sid = exp.sample_metadata.index.intersection(other.sample_metadata.index) + exp = exp.filter_ids(sid, axis=0) + other = other.filter_ids(sid, axis=0) + fmd = pd.concat([exp.feature_metadata, other.feature_metadata], join='outer') + fmd[field_name] = [origin_labels[0]] * exp.shape[1] + [origin_labels[1]] * other.shape[1] + newexp.sample_metadata = exp.sample_metadata + newexp.feature_metadata = fmd + # merge data table + newexp.data = np.c_[exp.data, other.data] + + return newexp diff --git a/calour/plotting.py b/calour/plotting.py index 9b19f305..691573b2 100644 --- a/calour/plotting.py +++ b/calour/plotting.py @@ -13,8 +13,9 @@ plot_enrichment plot_diff_abundance_enrichment plot_stacked_bar - plot_shareness + plot_core_features plot_abund_prevalence + plot_scatter_matrix ''' # ---------------------------------------------------------------------------- @@ -217,8 +218,8 @@ def plot_diff_abundance_enrichment(exp: Experiment, term_type='term', max_show=1 return ax2, newexp -def plot_shareness(exp: Experiment, field=None, steps=None, iterations=10, alpha=0.5, linewidth=0.7, ax=None): - '''Plot the number of shared features against the number of samples. +def plot_core_features(exp: Experiment, field=None, steps=None, cutoff=2, frac=0.9, iterations=10, alpha=0.5, linewidth=0.7, ax=None): + '''Plot the percentage of core features shared in increasing number of samples. To see if there is a core feature set shared across most of the samples. @@ -237,7 +238,12 @@ def plot_shareness(exp: Experiment, field=None, steps=None, iterations=10, alpha field : str sample metadata field to group samples steps : iterable of int - the sizes of subsamples to compute the shareness. + the sizes of subsamples to compute the fraction of core features. + cutoff : numeric + the feature is considered present in a sample only if its abundance is >= cutoff. + frac : numeric + Must between 0 and 1. The feature would be considered as a core feature + if it is present in ``fac`` faction of samples. iterations : int repeat the subsampling multiple times and plot all the iterations alpha : float @@ -251,7 +257,6 @@ def plot_shareness(exp: Experiment, field=None, steps=None, iterations=10, alpha ------- matplotlib.axes.Axes The Axes object containing the plot. - ''' if ax is None: from matplotlib import pyplot as plt @@ -264,15 +269,16 @@ def plot_shareness(exp: Experiment, field=None, steps=None, iterations=10, alpha def plot_lines(data, steps, label): y_sum = np.zeros(len(steps)) for i in range(iterations): - x, y = _compute_frac_nonzero(data, steps) + y = _compute_frac_nonzero(data, steps, cutoff, frac, i) + y = y * 100 y_sum += y if i == 0: - line, = ax.plot(x, y, alpha=alpha, linewidth=linewidth) + line, = ax.plot(steps, y, alpha=alpha, linewidth=linewidth) else: - ax.plot(x, y, alpha=alpha, linewidth=linewidth, color=line.get_color()) + ax.plot(steps, y, alpha=alpha, linewidth=linewidth, color=line.get_color()) y_ave = y_sum / iterations # plot average of the iterations - ax.plot(x, y_ave, linewidth=linewidth * 3, label=label, color=line.get_color()) + ax.plot(steps, y_ave, linewidth=linewidth * 3, label=label, color=line.get_color()) if field is None: plot_lines(exp.data, steps, label='all samples') @@ -286,11 +292,11 @@ def plot_lines(data, steps, label): # because the shareness drops quickly, we plot it in log scale ax.set_xscale('log') ax.set_xlabel('sample number') - ax.set_ylabel('fraction of shared features') + ax.set_ylabel('shared features (%)') return ax -def _compute_frac_nonzero(data, steps): +def _compute_frac_nonzero(data, steps, cutoff=2, frac=0.9, random_state=None): '''iteratively compute the fraction of non-zeros in each column after subsampling rows. Parameters @@ -298,27 +304,43 @@ def _compute_frac_nonzero(data, steps): data : 2-d array of numeric sample in row and feature in column steps : iterable of int - the sizes of subsamples + the subsample sizes (should be in descending order) + cutoff : numeric + the feature is considered present in a sample only if its abundance is >= cutoff. + frac : numeric + Must between 0 and 1. The feature would be considered as a core feature + if it is present in ``fac`` faction of samples. + random_state : int, RandomState instance or None, optional, default=None + If int, random_state is the seed used by the random number generator; + If RandomState instance, random_state is the random number generator; + If None, the random number generator is the RandomState instance used + by `np.random`. Return ------ - tuple of 2 lists - steps and fractions + numpy.array + fractions of core features for each subsample size ''' n_samples, n_features = data.shape - - shared = [] - for i in steps: - data = data[np.random.choice(n_samples, i, replace=False), :] - x = data > 0 + shared = np.zeros(len(steps)) + rand = np.random.RandomState(random_state) + if cutoff <= 0: + raise ValueError('You need to provide a positive value for `cutoff`: %r' % cutoff) + if frac <= 0 or frac > 1: + raise ValueError('You need to provide a value among (0, 1] for `frac`: %r' % frac) + for n, i in enumerate(steps): + data = data[rand.choice(n_samples, i, replace=False), :] + print(data) + x = data >= cutoff # the count of samples that have the given feature counts = x.sum(axis=0) - all_presence = np.sum(counts == i) + all_presence = np.sum(counts >= np.ceil(i * frac)) all_absence = np.sum(counts == 0) # important: remove the features that are all zeros across the subset of samples - shared.append(all_presence / (n_features - all_absence)) + shared[n] = all_presence / (n_features - all_absence) + # don't forget to update sample count n_samples = data.shape[0] - return steps, shared + return shared def plot_abund_prevalence(exp: Experiment, field, log=True, min_abund=0.01, alpha=0.5, linewidth=0.7, ax=None): diff --git a/calour/sorting.py b/calour/sorting.py index 0f5ccf3b..9b48b83f 100644 --- a/calour/sorting.py +++ b/calour/sorting.py @@ -37,7 +37,7 @@ from . import Experiment from .transforming import log_n, transform, scale from .util import _argsort -from .doc_init import ds +from ._doc import ds logger = getLogger(__name__) diff --git a/calour/tests/test_manipulation.py b/calour/tests/test_manipulation.py index cd8ef8ce..b94e23e7 100644 --- a/calour/tests/test_manipulation.py +++ b/calour/tests/test_manipulation.py @@ -10,9 +10,9 @@ from copy import deepcopy import numpy as np +import pandas as pd import calour as ca - from calour._testing import Tests, assert_experiment_equal @@ -50,6 +50,29 @@ def test_join_experiments(self): fexp = newexp.filter_samples('experiments', ['t2']) assert_experiment_equal(fexp, texp, ignore_md_fields=['experiments']) + def test_join_experiments_featurewise(self): + otu1 = ca.Experiment(np.array([[0, 9], [7, 4]]), sparse=False, + sample_metadata=pd.DataFrame({'category': ['B', 'A'], + 'ph': [7.7, 6.6]}, + index=['s2', 's1']), + feature_metadata=pd.DataFrame({'motile': ['y', 'n']}, index=['16S1', '16S2'])) + otu2 = ca.Experiment(np.array([[6], [8], [10]]), sparse=False, + sample_metadata=pd.DataFrame({'category': ['A', 'B', 'C'], + 'ph': [6.6, 7.7, 8.8]}, + index=['s1', 's2', 's3']), + feature_metadata=pd.DataFrame({'motile': [None]}, index=['ITS1'])) + combined_obs = otu1.join_experiments_featurewise(otu2, 'origin', ('16S', 'ITS')) + combined_exp = ca.Experiment(np.array([[7, 4, 6], [0, 9, 8]]), sparse=False, + sample_metadata=pd.DataFrame({'category': ['A', 'B'], + 'ph': [6.6, 7.7]}, + index=['s1', 's2']), + feature_metadata=pd.DataFrame({'motile': ['y', 'n', None], + 'origin': ['16S', '16S', 'ITS']}, + index=['16S1', '16S2', 'ITS1'])) + # reorder the samples + combined_obs = combined_obs.filter_ids(combined_exp.sample_metadata.index, axis=0) + assert_experiment_equal(combined_obs, combined_exp) + def test_agg_by_metadata(self): # test default conditions - on samples, not inplace, mean method newexp = self.test1.aggregate_by_metadata('group') diff --git a/calour/tests/test_plotting.py b/calour/tests/test_plotting.py index dc1775c7..41d897f4 100644 --- a/calour/tests/test_plotting.py +++ b/calour/tests/test_plotting.py @@ -81,12 +81,12 @@ def test_plot_abund_prevalence(self): assert_array_almost_equal(np.array([[i, j] for i, j in zip(x, y)]), lines[1].get_xydata()) - def test_plot_shareness(self): + def test_plot_core_features(self): np.random.seed(12345) self.test1 = ca.read(self.test1_biom, self.test1_samp, self.test1_feat, normalize=100) self.test1.sparse = False ax = self.test1.filter_samples( - 'group', ['1', '2']).plot_shareness( + 'group', ['1', '2']).plot_core_features( field='group', steps=(2, 12), iterations=2) lines = ax.get_lines() self.assertEqual(len(lines), 6) @@ -97,11 +97,16 @@ def test_compute_frac_nonzero(self): [4, 0, 4, 3, 0, 2, 0, 5], [2, 4, 0, 4, 2, 0, 1, 0], [3, 3, 5, 3, 1, 0, 0, 1]]) - np.random.seed(1) - steps, frac = _compute_frac_nonzero(data, [5, 3, 2]) - self.assertListEqual(steps, [5, 3, 2]) + + frac = _compute_frac_nonzero(data, [5, 3, 2], cutoff=0.1, frac=1, random_state=1) assert_array_almost_equal(frac, np.array([0, 0.25, 4/7])) + frac = _compute_frac_nonzero(data, [5, 3, 2], cutoff=0.1, frac=0.00001, random_state=1) + assert_array_almost_equal(frac, np.array([1, 1, 1])) + + frac = _compute_frac_nonzero(data, [5, 3, 2], cutoff=5, frac=1, random_state=1) + assert_array_almost_equal(frac, np.array([0, 0, 0])) + def test_plot_scatter_matrix(self): self.test2 = ca.read(self.test2_biom, self.test2_samp, self.test2_feat, normalize=100) fids = ['AA', 'AT', 'AG', 'AC'] diff --git a/calour/tests/test_training.py b/calour/tests/test_training.py index f4ada4f1..32ecae58 100644 --- a/calour/tests/test_training.py +++ b/calour/tests/test_training.py @@ -20,21 +20,29 @@ def setUp(self): self.test2_sparse = ca.read(self.test2_biom, self.test2_samp, self.test2_feat, normalize=None) self.test2_dense = ca.read(self.test2_biom, self.test2_samp, self.test2_feat, sparse=False, normalize=None) - def test_onehot_encode_features(self): - new = self.test2_sparse.onehot_encode_features(['categorical']) + def test_add_sample_metadata_as_features(self): + new = self.test2_sparse.add_sample_metadata_as_features(['categorical']) dat = new.data.toarray() assert_array_equal(dat[:, 0:3], [[1, 0, 0], [0, 1, 0], [0, 0, 1]] * 3) self.assertListEqual(new.feature_metadata.index[:3].tolist(), ['categorical=A', 'categorical=B', 'categorical=C']) - def test_onehot_encode_features_dense(self): - new = self.test2_dense.onehot_encode_features(['categorical']) + def test_add_sample_metadata_as_features_dense(self): + new = self.test2_dense.add_sample_metadata_as_features(['categorical']) assert_array_equal(new.data[:, 0:3], [[1, 0, 0], [0, 1, 0], [0, 0, 1]] * 3) self.assertListEqual(new.feature_metadata.index[:3].tolist(), ['categorical=A', 'categorical=B', 'categorical=C']) + def test_split_train_test(self): + train_X, test_X, train_y, test_y = self.test2_dense.split_train_test( + test_size=3, field='group', stratify='categorical', random_state=7) + self.assertListEqual(test_y.tolist(), [1, 2, 1]) + self.assertListEqual(test_y.index.tolist(), ['S3', 'S8', 'S1']) + self.assertListEqual(train_y.tolist(), [2, 1, 1, 1, 1, 1]) + self.assertListEqual(train_y.index.tolist(), ['S9', 'S6', 'S5', 'S2', 'S4', 'S7']) + if __name__ == "__main__": main() diff --git a/calour/training.py b/calour/training.py index f26775e8..c6ee9742 100644 --- a/calour/training.py +++ b/calour/training.py @@ -11,29 +11,36 @@ .. autosummary:: :toctree: generated - onehot_encode_features + add_sample_metadata_as_features ''' from logging import getLogger from sklearn.feature_extraction import DictVectorizer +from sklearn.model_selection import train_test_split from scipy.sparse import hstack import pandas as pd import numpy as np from .experiment import Experiment +from .amplicon_experiment import AmpliconExperiment logger = getLogger(__name__) @Experiment._record_sig -def onehot_encode_features(exp: Experiment, fields, sparse=None, inplace=False): +def add_sample_metadata_as_features(exp: Experiment, fields, sparse=None, inplace=False): '''Add covariates from sample metadata to the data table as features for machine learning. - This will convert the columns of categorical strings using one-hot encoding scheme and add them - into the data table as new features. + This will convert the columns of categorical strings using one-hot + encoding scheme and add them into the data table as new features. + + .. note:: This is only for numeric and/or nominal covariates in + sample metadata. If you want to add a ordinal column as a feature, + use `pandas.Series.map` to convert ordinal column to numeric + column first. Examples -------- @@ -47,7 +54,7 @@ def onehot_encode_features(exp: Experiment, fields, sparse=None, inplace=False): Let's add the columns of `category` and `ph` as features into data table: - >>> new = exp.onehot_encode_features(['category', 'ph']) + >>> new = exp.add_sample_metadata_as_features(['category', 'ph']) >>> new Experiment with 2 samples, 5 features >>> new.feature_metadata @@ -89,12 +96,55 @@ def onehot_encode_features(exp: Experiment, fields, sparse=None, inplace=False): md = new.sample_metadata[fields] if sparse is None: sparse = new.sparse + vec = DictVectorizer(sparse=sparse) encoded = vec.fit_transform(md.to_dict(orient='records')) + if sparse: - new.data = hstack((encoded, new.data)) + new.data = hstack((encoded, new.data), format='csr') else: new.data = np.concatenate([encoded, new.data], axis=1) # the order in the concatenation should be consistent with the data table new.feature_metadata = pd.concat([pd.DataFrame(index=vec.get_feature_names()), new.feature_metadata]) return new + + +def split_train_test(exp: Experiment, field, test_size, train_size=None, stratify=None, random_state=None): + '''Split experiment data into train and test set. + + ''' + if isinstance(stratify, str): + stratify = exp.sample_metadata[stratify] + y = exp.sample_metadata[field] + train_X, test_X, train_y, test_y = train_test_split( + exp.data, y, test_size=test_size, train_size=train_size, stratify=stratify, random_state=random_state) + return train_X, test_X, train_y, test_y + + +@Experiment._record_sig +def collect_cv_prediction(exp: Experiment, field, estimator, cv, scoring, inplace=False): + '''Do the CV + + ''' + # from sklearn.model_selection._split import check_cv + # from sklearn.base import is_classifier + # logger.debug('') + # if inplace: + # new = exp + # else: + # new = exp.copy() + # cv = check_cv(cv, y, classifier=is_classifier(estimator)) + # for params in paramgrid: + # for train_x, train_y in cv: + # estimator.fit(train_x, train_y) + + # return yobs, yhat, model + + +@Experiment._record_sig +def learning_curve_depths(exp: AmpliconExperiment, field, groups=None, + train_depths=np.array([0.1, 0.325, 0.55, 0.775, 1.]), + cv=None, scoring=None, exploit_incremental_learning=False, + n_jobs=1, pre_dispatch='all', verbose=0, shuffle=False, + random_state=None): + '''Compute the learning curve with regarding to sequencing depths.''' diff --git a/calour/transforming.py b/calour/transforming.py index 9ac96741..6455a885 100644 --- a/calour/transforming.py +++ b/calour/transforming.py @@ -40,8 +40,8 @@ from skbio.stats.composition import clr, centralize as skbio_centralize from skbio.stats import subsample_counts -from .experiment import Experiment -from .doc_init import ds +from . import Experiment +from ._doc import ds logger = getLogger(__name__) @@ -305,6 +305,7 @@ def random_permute_data(exp: Experiment, normalize=True): ------- Experiment With each feature shuffled independently + ''' newexp = exp.copy() newexp.sparse = False diff --git a/calour/util.py b/calour/util.py index f8758664..431c14a7 100644 --- a/calour/util.py +++ b/calour/util.py @@ -467,4 +467,7 @@ def register_functions(cls, modules=None): '\n exp : {0}' '\n Input experiment object.') + if not f.__doc__: + f.__doc__ = '' + f.__doc__ = p.sub(updated.format(cls.__name__, fn), f.__doc__)