Skip to content

Commit

Permalink
fix the errors in appveyor
Browse files Browse the repository at this point in the history
[publish doc]
  • Loading branch information
RNAer committed Apr 19, 2018
1 parent 58a35e7 commit ed19472
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 23 deletions.
2 changes: 0 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# This does NOT work yet because you can't install biom-format and skbio via conda

environment:
matrix:
# For Python versions available on Appveyor, see
Expand Down
6 changes: 3 additions & 3 deletions calour/amplicon_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ def find_lowest_taxonomy(self, field='taxonomy', new_field='taxa'):
'''
def find_highest(s):
l = s.split(';')
b = [len(i) > 3 for i in l]
return np.array(l)[b][-1]
levels = s.split(';')
b = [len(i) > 3 for i in levels]
return np.array(levels)[b][-1]
self.feature_metadata[new_field] = self.feature_metadata[field].apply(find_highest)
return self
1 change: 0 additions & 1 deletion calour/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,6 @@ def _compute_frac_nonzero(data, steps, cutoff=2, frac=0.9, random_state=None):
raise ValueError('You need to provide a value among (0, 1] for `frac`: %r' % frac)
for n, i in enumerate(steps):
data = data[rand.choice(n_samples, i, replace=False), :]
print(data)
x = data >= cutoff
# the count of samples that have the given feature
counts = x.sum(axis=0)
Expand Down
11 changes: 5 additions & 6 deletions calour/tests/test_transforming.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------

from unittest import main
import sys
from unittest import main, skipIf

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -138,15 +139,13 @@ def test_normalize_by_subset_features(self):
assert_array_almost_equal(newexp.data[:, good_features].sum(axis=1), np.ones([exp.data.shape[0]])*10000)
self.assertTrue(np.all(newexp.data[:, bad_features] > exp.data[:, bad_features]))

@skipIf(sys.platform.startswith("win"), "skip this test for Windows")
def test_subsample_count(self):
exp = ca.Experiment(data=np.array([[1, 2, 3], [4, 5, 6]]),
sample_metadata=pd.DataFrame([['a', 'b', 'c'], ['d', 'e', 'f']]),
sparse=False)
n = 6
obs = exp.subsample_count(n)
print(obs.data.shape)
print(obs.data.sum(axis=0))
print(obs.data.sum(axis=1))
n = 5
obs = exp.subsample_count(n, random_state=9)
assert_array_equal(obs.data.sum(axis=1), np.array([n, n]))
self.assertTrue(np.all(obs.data <= n))

Expand Down
20 changes: 14 additions & 6 deletions calour/transforming.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def center_log(exp: Experiment, method=lambda matrix: matrix + 1, centralize=Fal


@Experiment._record_sig
def subsample_count(exp: Experiment, total, replace=False, inplace=False):
def subsample_count(exp: Experiment, total, replace=False, inplace=False, random_state=None):
"""Randomly subsample each sample to the same number of counts.
.. warning:: This function will change the :attr:`Experiment.data`
Expand All @@ -362,6 +362,12 @@ def subsample_count(exp: Experiment, total, replace=False, inplace=False):
should be discrete count. The samples that have few total count
than ``total`` will be dropped.
.. note:: This function may not work on Windows OS. It relies on
the :func:`skbio.stats.subsample_counts` which have
`ValueError: Buffer dtype mismatch, expected 'int64_t' but got
'long'` in `_subsample_counts_without_replacement` function of
`skbio/stats/__subsample.pyx`
Parameters
----------
total : int, optional
Expand Down Expand Up @@ -391,13 +397,15 @@ def subsample_count(exp: Experiment, total, replace=False, inplace=False):
# check if it is normalized: if so, raise error
if exp.exp_metadata.get('normalized'):
raise ValueError('Your `Experiment` object is normalized: subsample operates on integer raw data, not on normalized data.')
newexp.data = newexp.data.astype(int)

drops = []
np.random.seed(random_state)
for row in range(newexp.data.shape[0]):
try:
newexp.data[row, :] = subsample_counts(newexp.data[row, :], n=total, replace=replace)
except ValueError:
# if the row sum is smaller than total in case replace is True, this row should be dropped
counts = newexp.data[row, :]
if total > counts.sum() and not replace:
drops.append(row)
else:
newexp.data[row, :] = subsample_counts(counts, n=total, replace=replace)

newexp.reorder([i not in drops for i in range(newexp.data.shape[0])], inplace=True)
return newexp
10 changes: 5 additions & 5 deletions calour/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ def compute_prevalence(abundance):
Examples
--------
>>> abund = [0, 0, 1, 2, 4, 1]
>>> abund = [0, 0, 1, 2, 4]
>>> x, y = compute_prevalence(abund)
>>> all(x == np.array([0, 1, 2, 4]))
True
>>> all(y == np.array([0.66666667, 0.33333333, 0.16666667, 0.]))
True
>>> x #doctest: +SKIP
array([0, 1, 2, 4])
>>> y #doctest: +SKIP
array([0.6, 0.4, 0.2, 0.])
'''
# unique values are sorted
cutoffs, counts = np.unique(abundance, return_counts=True)
Expand Down
1 change: 1 addition & 0 deletions ci/conda_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ scipy
pyqt>=5
pandas
matplotlib
h5py
scikit-learn
statsmodels
ipywidgets
Expand Down

0 comments on commit ed19472

Please sign in to comment.