Add adjusted q-value to return from multiple hypothesis testing (#203)

Add q-value (corrected p-values) to dsFDR (in diff_abundance() and correlation())
biocore · Jul 6, 2020 · e128102 · e128102
1 parent 23059d0
commit e128102
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 31 deletions.
diff --git a/calour/analysis.py b/calour/analysis.py
@@ -37,11 +37,12 @@
 
 _CALOUR_STAT = '_calour_stat'
 _CALOUR_PVAL = '_calour_pval'
+_CALOUR_QVAL = '_calour_qval'
 _CALOUR_DIRECTION = '_calour_direction'
 
 
 @Experiment._record_sig
-@format_docstring(_CALOUR_PVAL, _CALOUR_STAT)
+@format_docstring(_CALOUR_PVAL, _CALOUR_QVAL, _CALOUR_STAT, _CALOUR_DIRECTION)
 def correlation(exp: Experiment, field, method='spearman', nonzero=False, transform=None, numperm=1000, alpha=0.1, fdr_method='dsfdr', random_seed=None):
     '''Find features with correlation to a numeric metadata field.
 
@@ -91,13 +92,14 @@ def correlation(exp: Experiment, field, method='spearman', nonzero=False, transf
     Experiment
         The experiment with only correlated features, sorted according to correlation coefficient.
 
-        * '{}' : the FDR adjusted p-values for each feature
+        * '{}' : the non-adjusted p-values for each feature
+        * '{}' : the FDR-adjusted q-values for each feature
         * '{}' : the statistics (correlation coefficient if
           the `method` is 'spearman' or 'pearson'). If it
           is larger than zero for a given feature, it indicates this
           feature is positively correlated with the sample metadata;
           otherwise, negatively correlated.
-
+        * '{}' : in which of the 2 sample groups this given feature is increased.
     '''
     if field not in exp.sample_metadata.columns:
         raise ValueError('Field %s not in sample_metadata. Possible fields are: %s' % (field, exp.sample_metadata.columns))
@@ -127,16 +129,16 @@ def correlation(exp: Experiment, field, method='spearman', nonzero=False, transf
         else:
             raise ValueError('Cannot use nonzero=True on methods except "pearson" or "spearman"')
     # find the significant features
-    keep, odif, pvals = dsfdr.dsfdr(data, labels, method=method, transform_type=transform, alpha=alpha, numperm=numperm, fdr_method=fdr_method)
+    keep, odif, pvals, qvals = dsfdr.dsfdr(data, labels, method=method, transform_type=transform, alpha=alpha, numperm=numperm, fdr_method=fdr_method)
     logger.info('Positive correlated features : %d. Negative correlated features : %d. total %d'
                 % (np.sum(odif[keep] > 0), np.sum(odif[keep] < 0), np.sum(keep)))
-    newexp = _new_experiment_from_pvals(cexp, exp, keep, odif, pvals)
+    newexp = _new_experiment_from_pvals(cexp, exp, keep, odif, pvals, qvals)
     newexp.feature_metadata[_CALOUR_DIRECTION] = [field if x > 0 else 'Anti-%s' % field for x in newexp.feature_metadata[_CALOUR_STAT]]
     return newexp
 
 
 @Experiment._record_sig
-@format_docstring(_CALOUR_PVAL, _CALOUR_STAT, _CALOUR_DIRECTION)
+@format_docstring(_CALOUR_PVAL, _CALOUR_QVAL, _CALOUR_STAT, _CALOUR_DIRECTION)
 def diff_abundance(exp: Experiment, field, val1, val2=None, method='meandiff', transform='rankdata', numperm=1000, alpha=0.1, fdr_method='dsfdr', random_seed=None):
     '''Differential abundance test between 2 groups of samples for all the features.
 
@@ -195,7 +197,8 @@ def diff_abundance(exp: Experiment, field, val1, val2=None, method='meandiff', t
         according to their effect size.  The new experiment contains
         additional ``feature_metadata`` fields that include:
 
-        * '{}' : the FDR adjusted p-values for each feature
+        * '{}' : the non-adjusted p-values for each feature
+        * '{}' : the FDR-adjusted q-values for each feature
         * '{}' : the effect size (t-statistic). If it is larger than
           zero for a given feature, it indicates this feature is
           increased in the first group of samples (``val1``); if
@@ -231,10 +234,10 @@ def diff_abundance(exp: Experiment, field, val1, val2=None, method='meandiff', t
     labels = np.zeros(len(cexp.sample_metadata))
     labels[cexp.sample_metadata[field].isin(val1).values] = 1
     logger.info('%d samples with value 1 (%s)' % (np.sum(labels), val1))
-    keep, odif, pvals = dsfdr.dsfdr(data, labels, method=method, transform_type=transform, alpha=alpha, numperm=numperm, fdr_method=fdr_method)
+    keep, odif, pvals, qvals = dsfdr.dsfdr(data, labels, method=method, transform_type=transform, alpha=alpha, numperm=numperm, fdr_method=fdr_method)
     logger.info('number of higher in {}: {}. number of higher in {} : {}. total {}'.format(
         grp1, np.sum(odif[keep] > 0), grp2, np.sum(odif[keep] < 0), np.sum(keep)))
-    newexp = _new_experiment_from_pvals(cexp, exp, keep, odif, pvals)
+    newexp = _new_experiment_from_pvals(cexp, exp, keep, odif, pvals, qvals)
     newexp.feature_metadata[_CALOUR_DIRECTION] = [grp1 if x > 0 else grp2 for x in newexp.feature_metadata[_CALOUR_STAT]]
     return newexp
 
@@ -296,13 +299,13 @@ def diff_abundance_kw(exp: Experiment, field, transform='rankdata', numperm=1000
     for idx, clabel in enumerate(exp.sample_metadata[field].unique()):
         labels[exp.sample_metadata[field].values == clabel] = idx
     logger.debug('Found %d unique sample labels' % (idx + 1))
-    keep, odif, pvals = dsfdr.dsfdr(data, labels, method='kruwallis', transform_type=transform, alpha=alpha, numperm=numperm, fdr_method=fdr_method)
+    keep, odif, pvals, qvals = dsfdr.dsfdr(data, labels, method='kruwallis', transform_type=transform, alpha=alpha, numperm=numperm, fdr_method=fdr_method)
 
     logger.info('Found %d significant features' % (np.sum(keep)))
-    return _new_experiment_from_pvals(cexp, exp, keep, odif, pvals)
+    return _new_experiment_from_pvals(cexp, exp, keep, odif, pvals, qvals)
 
 
-def _new_experiment_from_pvals(cexp, exp, keep, odif, pvals):
+def _new_experiment_from_pvals(cexp, exp, keep, odif, pvals, qvals):
     '''Combine the pvalues and effect size into a new experiment.
 
     Keep only the significant features, sort the features by the effect size
@@ -317,8 +320,10 @@ def _new_experiment_from_pvals(cexp, exp, keep, odif, pvals):
         One entry per exp feature. True for the features which are significant (following FDR correction)
     odif : np.array of float
         One entry per exp feature. The effect size per feature (can be positive or negative)
-    pval : np.array of float
+    pvals : np.array of float
         One entry per exp feature. The p-value associated with each feature (not FDR corrected)
+    qvals: np.array of float
+        One entry per exp feature. The q-value associated with each feature (FDR corrected)
 
     Returns
     -------
@@ -335,10 +340,12 @@ def _new_experiment_from_pvals(cexp, exp, keep, odif, pvals):
         newexp = exp.filter_ids(newexp.feature_metadata.index.values)
     odif = odif[keep[0]]
     pvals = pvals[keep[0]]
+    qvals = qvals[keep[0]]
     si = np.argsort(odif, kind='mergesort')
     odif = odif[si]
     pvals = pvals[si]
     newexp = newexp.reorder(si, axis=1)
     newexp.feature_metadata[_CALOUR_STAT] = odif
     newexp.feature_metadata[_CALOUR_PVAL] = pvals
+    newexp.feature_metadata[_CALOUR_QVAL] = qvals
     return newexp
diff --git a/calour/dsfdr.py b/calour/dsfdr.py
@@ -103,7 +103,7 @@ def dsfdr(data, labels, transform_type='rankdata', method='meandiff',
 
     input:
     data : N x S numpy array
-        each column is a sample (S total), each row an OTU (N total)
+        each column is a sample (S total), each row a feature (N total)
     labels : a 1d numpy array (length S)
         the labels of each sample (same order as data) with the group
         (0/1 if binary, 0-G-1 if G groups, or numeric values for correlation)
@@ -112,8 +112,8 @@ def dsfdr(data, labels, transform_type='rankdata', method='meandiff',
     transform_type : str or None
         transformation to apply to the data before caluculating
         the test statistic
-        'rankdata' : rank transfrom each OTU reads
-        'log2data' : calculate log2 for each OTU using minimal cutoff of 2
+        'rankdata' : rank transfrom each feature
+        'log2data' : calculate log2 for each feature using minimal cutoff of 2
         'normdata' : normalize the data to constant sum per samples
         'binarydata' : convert to binary absence/presence
          None : no transformation to perform
@@ -146,11 +146,13 @@ def dsfdr(data, labels, transform_type='rankdata', method='meandiff',
 
     output:
     reject : np array of bool (length N)
-        True for OTUs where the null hypothesis is rejected
+        True for features where the null hypothesis is rejected
     tstat : np array of float (length N)
-        the test statistic value for each OTU (for effect size)
+        the test statistic value for each feature (for effect size)
     pvals : np array of float (length N)
-        the p-value for each OTU
+        the p-value (uncorrected) for each feature
+    qvals: np array of float (length N)
+        the q-value (corrected p-value) for each feature.
     '''
 
     logger.debug('dsfdr using fdr method: %s' % fdr_method)
@@ -304,8 +306,7 @@ def dsfdr(data, labels, transform_type='rankdata', method='meandiff',
             u[:, cperm] = rt
         u = np.abs(u)
     else:
-        print('unsupported method %s' % method)
-        return None, None
+        raise ValueError('unsupported method %s' % method)
 
     # fix floating point errors (important for permutation values!)
     # https://github.com/numpy/numpy/issues/8116
@@ -315,6 +316,7 @@ def dsfdr(data, labels, transform_type='rankdata', method='meandiff',
 
     # calculate permutation p-vals
     pvals = np.zeros([numbact])  # p-value for original test statistic t
+    qvals = np.ones([numbact])  # q-value (corrected p-value) for each feature.
     pvals_u = np.zeros([numbact, numperm])
     # pseudo p-values for permutated test statistic u
     for crow in range(numbact):
@@ -342,30 +344,37 @@ def dsfdr(data, labels, transform_type='rankdata', method='meandiff',
             allfdr.append(fdr)
             allt.append(cp)
             if fdr <= alpha:
-                realcp = cp
-                foundit = True
-                break
+                if not foundit:
+                    realcp = cp
+                    foundit = True
 
         if not foundit:
             # no good threshold was found
             reject = np.repeat([False], numbact)
-            return reject, tstat, pvals
+            return reject, tstat, pvals, qvals
 
         # fill the reject null hypothesis
         reject = np.zeros(numbact, dtype=int)
         reject = (pvals <= realcp)
 
+        # fill the q-values
+        for idx, cfdr in enumerate(allfdr):
+            # fix for qval > 1 (since we count on all features in random permutation)
+            cfdr = np.min([cfdr, 1])
+            cpval = allt[idx]
+            qvals[pvals == cpval] = cfdr
+
     elif fdr_method == 'bhfdr' or fdr_method == 'filterBH':
         t_star = np.array([t, ] * numperm).transpose()
         pvals = (np.sum(u >= t_star, axis=1) + 1) / (numperm + 1)
-        reject = multipletests(pvals, alpha=alpha, method='fdr_bh')[0]
+        reject, qvals, *_ = multipletests(pvals, alpha=alpha, method='fdr_bh')
 
     elif fdr_method == 'byfdr':
         t_star = np.array([t, ] * numperm).transpose()
         pvals = (np.sum(u >= t_star, axis=1) + 1) / (numperm + 1)
-        reject = multipletests(pvals, alpha=alpha, method='fdr_by')[0]
+        reject, qvals, *_ = multipletests(pvals, alpha=alpha, method='fdr_by')
 
     else:
         raise ValueError('fdr method %s not supported' % fdr_method)
 
-    return reject, tstat, pvals
+    return reject, tstat, pvals, qvals
diff --git a/calour/tests/test_dsfdr.py b/calour/tests/test_dsfdr.py
@@ -126,19 +126,21 @@ def test_dsfdr(self):
         res_ds = dsfdr.dsfdr(self.data, self.labels, method='meandiff',
                              transform_type=None, alpha=0.1, numperm=1000,
                              fdr_method='dsfdr')
-        self.assertEqual(np.shape(res_ds)[0], self.data.shape[0])
+        self.assertEqual(np.shape(res_ds[0])[0], self.data.shape[0])
         np.testing.assert_array_equal(res_ds[0], [True, False, False])
+        # test the qvals behave logically
+        self.assertEqual(np.sum(res_ds[3] <= 0.1), np.sum(res_ds[0]))
 
         res_bh = dsfdr.dsfdr(self.data, self.labels, method='meandiff',
                              transform_type=None, alpha=0.1, numperm=1000,
                              fdr_method='bhfdr')
-        self.assertEqual(np.shape(res_bh)[0], self.data.shape[0])
+        self.assertEqual(np.shape(res_bh[0])[0], self.data.shape[0])
         np.testing.assert_array_equal(res_bh[0], [True, False, False])
 
         res_by = dsfdr.dsfdr(self.data, self.labels, method='meandiff',
                              transform_type=None, alpha=0.1, numperm=1000,
                              fdr_method='byfdr')
-        self.assertEqual(np.shape(res_by)[0], self.data.shape[0])
+        self.assertEqual(np.shape(res_by[0])[0], self.data.shape[0])
         np.testing.assert_array_equal(res_by[0], [True, False, False])
 
         # test on simulated self.data_sim
@@ -148,6 +150,8 @@ def test_dsfdr(self):
                               alpha=0.1, numperm=1000, fdr_method='dsfdr')[0]
         fdr_ds2 = (np.sum(np.where(res_ds2)[0] >= 100)) / np.sum(res_ds2)
         np.testing.assert_equal(fdr_ds2 <= 0.1, True)
+        # test the qvals behave logically
+        self.assertEqual(np.sum(res_ds2[3] <= 0.1), np.sum(res_ds2[0] is True))
 
         np.random.seed(31)
         res_bh2 = dsfdr.dsfdr(self.data_sim, self.labels_sim,