Skip to content

Commit

Permalink
fix dtypes (#242)
Browse files Browse the repository at this point in the history
  • Loading branch information
alimanfoo committed Dec 21, 2018
1 parent 3bff7d7 commit 8fcf264
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 26 deletions.
15 changes: 7 additions & 8 deletions allel/model/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def f(block):
return gb.count_alleles(max_allele=max_allele)[:, None, :]

# map blocks and reduce
out = da.map_blocks(f, gd, chunks=chunks, dtype='i4').sum(axis=1)
out = da.map_blocks(f, gd, chunks=chunks).sum(axis=1, dtype='i4')

else:

Expand All @@ -393,7 +393,7 @@ def f(block, bmask):
return g.count_alleles(max_allele=max_allele)[:, None, :]

md = self.mask[:, :, None]
out = da.map_blocks(f, gd, md, chunks=chunks, dtype='i4').sum(axis=1)
out = da.map_blocks(f, gd, md, chunks=chunks).sum(axis=1, dtype='i4')

return AlleleCountsDaskArray(out)

Expand Down Expand Up @@ -430,7 +430,7 @@ def f(block, bmapping):

# map blocks
out = da.map_blocks(f, self.values, mapping[:, None, :], chunks=self.chunks,
dtype=mapping.dtype)
dtype=self.dtype)
return type(self)(out)

def to_allele_counts(self, max_allele=None):
Expand Down Expand Up @@ -602,8 +602,7 @@ def f(block):
return h.count_alleles(max_allele=max_allele)[:, None, :]

# map blocks and reduce
# TODO need to figure out dtype?
out = hd.map_blocks(f, chunks=chunks, new_axis=2).sum(axis=1)
out = hd.map_blocks(f, chunks=chunks, new_axis=2).sum(axis=1, dtype='i4')
return AlleleCountsDaskArray(out)

def count_alleles_subpops(self, subpops, max_allele=None):
Expand All @@ -625,7 +624,7 @@ def f(block, bmapping):
mapping = da.from_array(mapping, chunks=(self.chunks[0], None))

# map blocks
out = da.map_blocks(f, self.values, mapping, chunks=self.chunks, dtype=mapping.dtype)
out = da.map_blocks(f, self.values, mapping, chunks=self.chunks, dtype=self.dtype)
return HaplotypeDaskArray(out)

def compress(self, condition, axis=0, out=None):
Expand Down Expand Up @@ -784,7 +783,7 @@ def f(block, bmapping):
return ac.map_alleles(bmapping, max_allele=max_allele)

# map blocks
out = da.map_blocks(f, self.values, mapping, dtype=mapping.dtype)
out = da.map_blocks(f, self.values, mapping, dtype=self.dtype)
return AlleleCountsDaskArray(out)

def compress(self, condition, axis=0, out=None):
Expand Down Expand Up @@ -979,7 +978,7 @@ def count_alleles(self, subpop=None):
else:
gd = self.values

out = gd.sum(axis=1)
out = gd.sum(axis=1, dtype='i4')
return AlleleCountsDaskArray(out)

def compress(self, condition, axis=0, out=None):
Expand Down
3 changes: 2 additions & 1 deletion allel/model/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3322,7 +3322,8 @@ def count_alleles(self, subpop=None):
else:
g = self.values

out = g.sum(axis=1)
out = np.empty((g.shape[0], g.shape[2]), dtype='i4')
g.sum(axis=1, out=out)
out = AlleleCountsArray(out)
return out

Expand Down
52 changes: 35 additions & 17 deletions allel/test/model/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,7 @@ def test_count_alleles(self):
aeq(expect, actual)
assert 5 == actual.n_variants
assert 3 == actual.n_alleles
assert np.dtype('i4') == actual.dtype

# polyploid
g = self.setup_instance(triploid_genotype_data, dtype=dtype)
Expand All @@ -791,6 +792,7 @@ def test_count_alleles(self):
aeq(expect, actual)
assert 4 == actual.n_variants
assert 3 == actual.n_alleles
assert np.dtype('i4') == actual.dtype

def test_count_alleles_subpop(self):
for dtype in None, 'i1', 'i2', 'i4', 'i8':
Expand All @@ -810,6 +812,7 @@ def test_count_alleles_subpop(self):
aeq(expect, actual)
assert 5 == actual.n_variants
assert 3 == actual.n_alleles
assert np.dtype('i4') == actual.dtype

def test_count_alleles_subpops(self):
for dtype in None, 'i1', 'i2', 'i4', 'i8':
Expand All @@ -833,8 +836,10 @@ def test_count_alleles_subpops(self):
aeq(expect_sub2, actual['sub2'])
assert 5 == actual['sub1'].n_variants
assert 3 == actual['sub1'].n_alleles
assert np.dtype('i4') == actual['sub1'].dtype
assert 5 == actual['sub2'].n_variants
assert 3 == actual['sub2'].n_alleles
assert np.dtype('i4') == actual['sub2'].dtype

def test_count_alleles_max_allele(self):

Expand Down Expand Up @@ -881,7 +886,7 @@ def test_count_alleles_max_allele(self):
aeq(expect[:, :1], actual)

def test_map_alleles(self):
for dtype in None, 'i1', 'i2', 'i4', 'i8':
for dtype in 'i1', 'i2', 'i4', 'i8':
a = np.array(diploid_genotype_data, dtype=dtype)
g = self.setup_instance(a)
mapping = np.array([[0, 1, 2],
Expand All @@ -896,6 +901,8 @@ def test_map_alleles(self):
[[-1, -1], [-1, -1], [-1, -1]]]
actual = g.map_alleles(mapping)
aeq(expect, actual)
# match input dtype
assert dtype == actual.dtype

def test_set_mask(self):

Expand Down Expand Up @@ -1268,6 +1275,7 @@ def test_count_alleles(self):
aeq(expect, actual)
assert 4 == actual.n_variants
assert 3 == actual.n_alleles
assert np.dtype('i4') == actual.dtype

def test_count_alleles_subpop(self):
expect = np.array([[1, 0, 0],
Expand All @@ -1285,6 +1293,7 @@ def test_count_alleles_subpop(self):
aeq(expect, actual)
assert 4 == actual.n_variants
assert 3 == actual.n_alleles
assert np.dtype('i4') == actual.dtype

def test_count_alleles_subpops(self):
expect_sub1 = np.array([[1, 0, 0],
Expand All @@ -1304,8 +1313,10 @@ def test_count_alleles_subpops(self):
aeq(expect_sub2, actual['sub2'])
assert 4 == actual['sub1'].n_variants
assert 3 == actual['sub1'].n_alleles
assert np.dtype('i4') == actual['sub1'].dtype
assert 4 == actual['sub2'].n_variants
assert 3 == actual['sub2'].n_alleles
assert np.dtype('i4') == actual['sub2'].dtype

def test_count_alleles_max_allele(self):
expect = np.array([[1, 1, 0],
Expand Down Expand Up @@ -1342,12 +1353,13 @@ def test_map_alleles(self):
actual = h.map_alleles(mapping)
aeq(expect, actual)

for dtype in None, 'i1', 'i2', 'i4', 'i8':
for dtype in 'i1', 'i2', 'i4', 'i8':
a = np.array(haplotype_data, dtype=dtype)
h = self.setup_instance(a)
mapping = np.array(mapping, dtype=dtype)
actual = h.map_alleles(mapping)
aeq(expect, actual)
assert dtype == actual.dtype

def test_concatenate(self):
a = np.array(haplotype_data, dtype=np.int8)
Expand Down Expand Up @@ -1536,21 +1548,24 @@ def test_is_biallelic_01(self):
aeq(expect, actual)

def test_map_alleles(self):
ac = self.setup_instance(allele_counts_data)
mapping = np.array([[0, 1, 2],
[2, 0, 1],
[1, 2, 0],
[-1, 1, 0],
[2, 0, 1],
[0, 2, 1]])
expect = [[3, 1, 0],
[2, 1, 1],
[1, 1, 2],
[2, 0, 0],
[0, 0, 0],
[0, 2, 1]]
actual = ac.map_alleles(mapping)
aeq(expect, actual)
for dtype in 'i2', 'i4', 'i8':
ac = self.setup_instance(np.array(allele_counts_data, dtype=dtype))
mapping = np.array([[0, 1, 2],
[2, 0, 1],
[1, 2, 0],
[-1, 1, 0],
[2, 0, 1],
[0, 2, 1]])
expect = [[3, 1, 0],
[2, 1, 1],
[1, 1, 2],
[2, 0, 0],
[0, 0, 0],
[0, 2, 1]]
actual = ac.map_alleles(mapping)
aeq(expect, actual)
# match dtype of input
assert dtype == actual.dtype

# another test based on https://github.com/cggh/scikit-allel/issues/200
ac = self.setup_instance([[10, 20, 30, 40],
Expand Down Expand Up @@ -1936,6 +1951,7 @@ def test_count_alleles(self):
aeq(expect, actual)
assert 5 == actual.n_variants
assert 3 == actual.n_alleles
assert np.dtype('i4') == actual.dtype

# polyploid
g = self.setup_instance(triploid_genotype_ac_data)
Expand All @@ -1947,6 +1963,7 @@ def test_count_alleles(self):
aeq(expect, actual)
assert 4 == actual.n_variants
assert 3 == actual.n_alleles
assert np.dtype('i4') == actual.dtype

def test_count_alleles_subpop(self):
g = self.setup_instance(diploid_genotype_ac_data)
Expand All @@ -1964,6 +1981,7 @@ def test_count_alleles_subpop(self):
aeq(expect, actual)
assert 5 == actual.n_variants
assert 3 == actual.n_alleles
assert np.dtype('i4') == actual.dtype


class SortedIndexInterface(object):
Expand Down

0 comments on commit 8fcf264

Please sign in to comment.