Merge tag 'v0.16.2'

v0.16.2
cggh · Sep 24, 2015 · d26a528 · d26a528
2 parents 9f1a32d + 8651029
commit d26a528
Show file tree

Hide file tree

Showing 7 changed files with 95 additions and 11 deletions.
diff --git a/allel/__init__.py b/allel/__init__.py
@@ -2,7 +2,7 @@
 # flake8: noqa
 
 
-__version__ = '0.16.1'
+__version__ = '0.16.2'
 
 
 import allel.model as model

diff --git a/allel/model/bcolz.py b/allel/model/bcolz.py
@@ -243,20 +243,37 @@ def carray_block_compress(carr, condition, axis, blen=None, **kwargs):
         raise NotImplementedError('axis not supported: %s' % axis)
 
 
-def carray_block_take(carr, indices, axis, **kwargs):
+def carray_block_take(carr, indices, axis, blen=None, **kwargs):
+    if blen is None:
+        blen = carr.chunklen
 
     # check inputs
     indices = asarray_ndim(indices, 1)
 
     if axis == 0:
+        # check if indices are ordered
+        if np.any(indices[1:] <= indices[:-1]):
+            raise ValueError('indices must be strictly increasing')
         condition = np.zeros((carr.shape[0],), dtype=bool)
         condition[indices] = True
-        return carray_block_compress(carr, condition, axis=0, **kwargs)
+        return carray_block_compress(carr, condition, axis=0,
+                                     blen=blen, **kwargs)
 
     elif axis == 1:
-        condition = np.zeros((carr.shape[1],), dtype=bool)
-        condition[indices] = True
-        return carray_block_compress(carr, condition, axis=1, **kwargs)
+
+        # setup output
+        kwargs.setdefault('dtype', carr.dtype)
+        kwargs.setdefault('cparams', getattr(carr, 'cparams', None))
+        kwargs.setdefault('expectedlen', carr.shape[0])
+        out = bcolz.zeros((0, len(indices)) + carr.shape[2:],
+                          **kwargs)
+
+        # build output
+        for i in range(0, carr.shape[0], blen):
+            block = carr[i:i+blen]
+            out.append(np.take(block, indices, axis=1))
+
+        return out
 
     else:
         raise NotImplementedError('axis not supported: %s' % axis)
@@ -296,6 +313,9 @@ def ctable_block_compress(ctbl, condition, blen=None, **kwargs):
 
 def ctable_block_take(ctbl, indices, **kwargs):
     indices = asarray_ndim(indices, 1)
+    # check if indices are ordered
+    if np.any(indices[1:] <= indices[:-1]):
+        raise ValueError('indices must be strictly increasing')
     condition = np.zeros((ctbl.shape[0],), dtype=bool)
     condition[indices] = True
     return ctable_block_compress(ctbl, condition, **kwargs)

diff --git a/allel/stats/preprocessing.py b/allel/stats/preprocessing.py
@@ -5,19 +5,24 @@
 import numpy as np
 
 
+from allel.compat import text_type
 from allel.util import asarray_ndim
 
 
 def get_scaler(scaler, copy, ploidy):
+    # normalise strings to lower case
+    if isinstance(scaler, text_type):
+        scaler = scaler.lower()
     if scaler == 'patterson':
-        scaler = PattersonScaler(copy=copy, ploidy=ploidy)
+        return PattersonScaler(copy=copy, ploidy=ploidy)
     elif scaler == 'standard':
-        scaler = StandardScaler(copy=copy)
+        return StandardScaler(copy=copy)
     elif hasattr(scaler, 'fit'):
-        pass
+        return scaler
+    elif scaler in ['center', 'centre'] or scaler is None:
+        return CenterScaler(copy=copy)
     else:
-        scaler = CenterScaler(copy=copy)
-    return scaler
+        raise ValueError('unrecognised scaler: %s' % scaler)
 
 
 class StandardScaler(object):

diff --git a/allel/test/test_model_api.py b/allel/test/test_model_api.py
@@ -198,6 +198,15 @@ def test_take(self):
         expect = np.array(diploid_genotype_data).take(indices, axis=1)
         aeq(expect, t)
 
+        # take samples not in original order
+        indices = [2, 0]
+        t = g.take(indices, axis=1)
+        eq(g.n_variants, t.n_variants)
+        eq(2, t.n_samples)
+        eq(g.ploidy, t.ploidy)
+        expect = np.array(diploid_genotype_data).take(indices, axis=1)
+        aeq(expect, t)
+
     def test_compress(self):
         # Test the compress() method.
 

diff --git a/allel/test/test_model_bcolz.py b/allel/test/test_model_bcolz.py
@@ -190,6 +190,14 @@ def test_to_hdf5(self):
             h5d = h5f[node_path]
             aeq(g[:], h5d[:])
 
+    def test_take(self):
+        g = self.setup_instance(diploid_genotype_data)
+        # take variants not in original order
+        # not supported for carrays
+        indices = [2, 0]
+        with assert_raises(ValueError):
+            t = g.take(indices, axis=0)
+
 
 class HaplotypeCArrayTests(HaplotypeArrayInterface, unittest.TestCase):
 
@@ -463,6 +471,15 @@ def test_to_hdf5_group(self):
             for n in a.dtype.names:
                 aeq(a[n], h5g[n][:])
 
+    def test_take(self):
+        a = np.rec.array(variant_table_data, dtype=variant_table_dtype)
+        vt = VariantCTable(a)
+        # take variants not in original order
+        # not supported for carrays
+        indices = [2, 0]
+        with assert_raises(ValueError):
+            t = vt.take(indices)
+
 
 class FeatureCTableTests(FeatureTableInterface, unittest.TestCase):
 

diff --git a/allel/test/test_model_ndarray.py b/allel/test/test_model_ndarray.py
@@ -166,6 +166,17 @@ def test_haploidify_samples(self):
             for j in range(g.n_samples):
                 self.assertIn(h[i, j], set(g[i, j]))
 
+    def test_take(self):
+        g = self.setup_instance(diploid_genotype_data)
+        # take variants not in original order
+        indices = [2, 0]
+        t = g.take(indices, axis=0)
+        eq(2, t.n_variants)
+        eq(g.n_samples, t.n_samples)
+        eq(g.ploidy, t.ploidy)
+        expect = np.array(diploid_genotype_data).take(indices, axis=0)
+        aeq(expect, t)
+
 
 class HaplotypeArrayTests(HaplotypeArrayInterface, unittest.TestCase):
 
@@ -617,6 +628,17 @@ def test_view(self):
         eq(5, vt.n_variants)
         eq(variant_table_names, vt.names)
 
+    def test_take(self):
+        a = np.rec.array(variant_table_data,
+                         dtype=variant_table_dtype)
+        vt = VariantTable(a)
+        # take variants not in original order
+        indices = [2, 0]
+        t = vt.take(indices)
+        eq(2, t.n_variants)
+        expect = a.take(indices)
+        aeq(expect, t)
+
 
 class FeatureTableTests(FeatureTableInterface, unittest.TestCase):
 

diff --git a/docs/release.rst b/docs/release.rst
@@ -1,6 +1,17 @@
 Release notes
 =============
 
+v0.16.2
+-------
+
+* Fixed behaviour of take() method on compressed arrays when indices are not
+  in increasing order
+  (`#6 <https://github.com/cggh/scikit-allel/issues/6>`_).
+* Minor change to scaler argument to PCA functions in
+  :mod:`allel.stats.decomposition` to avoid confusion about when to fall
+  back to default scaler
+  (`#7 <https://github.com/cggh/scikit-allel/issues/7>`_).
+
 v0.16.1
 -------