Skip to content

Commit

Permalink
Resolves problems when count_alleles() is given subpop as numpy array (
Browse files Browse the repository at this point in the history
…#235)

* test and fix ambiguous subpop test

* release notes
  • Loading branch information
alimanfoo committed Dec 20, 2018
1 parent df657b4 commit 7fe1712
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 47 deletions.
12 changes: 8 additions & 4 deletions allel/model/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
import dask.array as da


from allel.util import check_shape, check_dtype, check_ndim, check_integer_dtype
from allel.util import check_shape, check_dtype, check_ndim, check_integer_dtype, \
asarray_ndim
from allel.abc import ArrayWrapper, DisplayAs2D, DisplayAs1D
from allel.compat import copy_method_doc
from .ndarray import GenotypeArray, HaplotypeArray, AlleleCountsArray, GenotypeVector, \
Expand Down Expand Up @@ -364,7 +365,8 @@ def count_alleles(self, max_allele=None, subpop=None):
max_allele = self.max().compute()[()]

# deal with subpop
if subpop:
subpop = asarray_ndim(subpop, 1, allow_none=True, dtype=np.int64)
if subpop is not None:
gd = self.take(subpop, axis=1).values
else:
gd = self.values
Expand Down Expand Up @@ -585,7 +587,8 @@ def count_alleles(self, max_allele=None, subpop=None):
max_allele = self.max().compute()[()]

# deal with subpop
if subpop:
subpop = asarray_ndim(subpop, 1, allow_none=True, dtype=np.int64)
if subpop is not None:
hd = self.take(subpop, axis=1).values
else:
hd = self.values
Expand Down Expand Up @@ -966,7 +969,8 @@ def _method_drop_ploidy(self, method_name, **kwargs):
def count_alleles(self, subpop=None):

# deal with subpop
if subpop:
subpop = asarray_ndim(subpop, 1, allow_none=True, dtype=np.int64)
if subpop is not None:
gd = self.take(subpop, axis=1).values
else:
gd = self.values
Expand Down
30 changes: 15 additions & 15 deletions allel/model/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1342,6 +1342,17 @@ def to_str(self, threshold=10, edgeitems=5):
copy_method_doc(GenotypeVector.to_haplotypes, Genotypes.to_haplotypes)


def _normalize_subpop_arg(subpop, n):
subpop = asarray_ndim(subpop, 1, allow_none=True, dtype=np.int64)
if subpop is not None:
if np.any(subpop >= n):
raise ValueError('index out of bounds')
if np.any(subpop < 0):
raise ValueError('negative indices not supported')
subpop = memoryview_safe(subpop)
return subpop


class GenotypeArray(Genotypes, DisplayAs2D):
"""Array of discrete genotype calls for a matrix of variants and samples.
Expand Down Expand Up @@ -1820,13 +1831,7 @@ def count_alleles(self, max_allele=None, subpop=None):
"""

# check inputs
subpop = asarray_ndim(subpop, 1, allow_none=True, dtype=np.int64)
if subpop is not None:
if np.any(subpop >= self.shape[1]):
raise ValueError('index out of bounds')
if np.any(subpop < 0):
raise ValueError('negative indices not supported')
subpop = memoryview_safe(subpop)
subpop = _normalize_subpop_arg(subpop, self.shape[1])

# determine alleles to count
if max_allele is None:
Expand Down Expand Up @@ -2381,13 +2386,7 @@ def count_alleles(self, max_allele=None, subpop=None):
"""

# check inputs
subpop = asarray_ndim(subpop, 1, allow_none=True, dtype=np.int64)
if subpop is not None:
if np.any(subpop >= self.shape[1]):
raise ValueError('index out of bounds')
if np.any(subpop < 0):
raise ValueError('negative indices not supported')
subpop = memoryview_safe(subpop)
subpop = _normalize_subpop_arg(subpop, self.shape[1])

# determine alleles to count
if max_allele is None:
Expand Down Expand Up @@ -3314,7 +3313,8 @@ def n_alleles(self):
def count_alleles(self, subpop=None):

# deal with subpop
if subpop:
subpop = _normalize_subpop_arg(subpop, self.shape[1])
if subpop is not None:
g = self.take(subpop, axis=1).values
else:
g = self.values
Expand Down
74 changes: 46 additions & 28 deletions allel/test/model/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,16 +803,20 @@ def test_count_alleles_subpop(self):
[1, 1, 0],
[0, 0, 2],
[0, 0, 0]])
actual = g.count_alleles(subpop=[0, 2])
aeq(expect, actual)
eq(5, actual.n_variants)
eq(3, actual.n_alleles)
# test different types of subpop argument
for subpop in ([0, 2], (0, 2),
np.array([0, 2], dtype='i4'),
np.array([0, 2], dtype='i8'),
np.array([0, 2], dtype='u2')):
actual = g.count_alleles(subpop=subpop)
aeq(expect, actual)
eq(5, actual.n_variants)
eq(3, actual.n_alleles)

def test_count_alleles_subpops(self):
for dtype in None, 'i1', 'i2', 'i4', 'i8':
# make sure we test the optimisations too
g = self.setup_instance(diploid_genotype_data, dtype=dtype)
subpops = {'sub1': [0, 2], 'sub2': [1, 2]}
expect_sub1 = np.array([[2, 0, 0],
[1, 0, 1],
[1, 1, 0],
Expand All @@ -823,13 +827,16 @@ def test_count_alleles_subpops(self):
[0, 1, 1],
[0, 0, 0],
[0, 0, 0]])
actual = g.count_alleles_subpops(subpops=subpops)
aeq(expect_sub1, actual['sub1'])
aeq(expect_sub2, actual['sub2'])
eq(5, actual['sub1'].n_variants)
eq(3, actual['sub1'].n_alleles)
eq(5, actual['sub2'].n_variants)
eq(3, actual['sub2'].n_alleles)
# test different types of subpop argument
for subpops in ({'sub1': [0, 2], 'sub2': [1, 2]},
{'sub1': np.array([0, 2]), 'sub2': np.array([1, 2])}):
actual = g.count_alleles_subpops(subpops=subpops)
aeq(expect_sub1, actual['sub1'])
aeq(expect_sub2, actual['sub2'])
eq(5, actual['sub1'].n_variants)
eq(3, actual['sub1'].n_alleles)
eq(5, actual['sub2'].n_variants)
eq(3, actual['sub2'].n_alleles)

def test_count_alleles_max_allele(self):

Expand Down Expand Up @@ -1271,10 +1278,15 @@ def test_count_alleles_subpop(self):
[0, 0, 0]])
for dtype in None, 'i1', 'i2', 'i4', 'i8':
h = self.setup_instance(haplotype_data, dtype=dtype)
actual = h.count_alleles(subpop=[0, 2])
aeq(expect, actual)
eq(4, actual.n_variants)
eq(3, actual.n_alleles)
# test different types of subpop argument
for subpop in ([0, 2], (0, 2),
np.array([0, 2], dtype='i4'),
np.array([0, 2], dtype='i8'),
np.array([0, 2], dtype='u2')):
actual = h.count_alleles(subpop=subpop)
aeq(expect, actual)
eq(4, actual.n_variants)
eq(3, actual.n_alleles)

def test_count_alleles_subpops(self):
expect_sub1 = np.array([[1, 0, 0],
Expand All @@ -1287,14 +1299,15 @@ def test_count_alleles_subpops(self):
[0, 0, 0]])
for dtype in None, 'i1', 'i2', 'i4', 'i8':
h = self.setup_instance(haplotype_data, dtype=dtype)
subpops = {'sub1': [0, 2], 'sub2': [1, 2]}
actual = h.count_alleles_subpops(subpops=subpops)
aeq(expect_sub1, actual['sub1'])
aeq(expect_sub2, actual['sub2'])
eq(4, actual['sub1'].n_variants)
eq(3, actual['sub1'].n_alleles)
eq(4, actual['sub2'].n_variants)
eq(3, actual['sub2'].n_alleles)
for subpops in ({'sub1': [0, 2], 'sub2': [1, 2]},
{'sub1': np.array([0, 2]), 'sub2': np.array([1, 2])}):
actual = h.count_alleles_subpops(subpops=subpops)
aeq(expect_sub1, actual['sub1'])
aeq(expect_sub2, actual['sub2'])
eq(4, actual['sub1'].n_variants)
eq(3, actual['sub1'].n_alleles)
eq(4, actual['sub2'].n_variants)
eq(3, actual['sub2'].n_alleles)

def test_count_alleles_max_allele(self):
expect = np.array([[1, 1, 0],
Expand Down Expand Up @@ -1920,10 +1933,15 @@ def test_count_alleles_subpop(self):
[1, 1, 0],
[0, 0, 2],
[0, 0, 0]])
actual = g.count_alleles(subpop=[0, 2])
aeq(expect, actual)
eq(5, actual.n_variants)
eq(3, actual.n_alleles)
# test different types of subpop argument
for subpop in ([0, 2], (0, 2),
np.array([0, 2], dtype='i4'),
np.array([0, 2], dtype='i8'),
np.array([0, 2], dtype='u2')):
actual = g.count_alleles(subpop=subpop)
aeq(expect, actual)
eq(5, actual.n_variants)
eq(3, actual.n_alleles)


class SortedIndexInterface(object):
Expand Down
4 changes: 4 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ v1.2.0 (work in progress)
* Fixed incorrect fill value in GFF parsing functions. By
:user:`Alistair Miles <alimanfoo>`, :issue:`165`, :issue:`223`.

* Fixed a problem in `count_alleles()` methods where a `subpop` arg
was provided as a numpy array. By :user:`Alistair Miles
<alimanfoo>`, :issue:`235`, :issue:`171`.

* Fixed pandas deprecation warning. By :user:`Summer Rae <summerela>`,
:issue:`228`.

Expand Down

0 comments on commit 7fe1712

Please sign in to comment.