Skip to content

Commit

Permalink
test and fix for #59 on genotype arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
alimanfoo committed Feb 1, 2016
1 parent d007ba4 commit 27d4587
Show file tree
Hide file tree
Showing 8 changed files with 358 additions and 299 deletions.
457 changes: 231 additions & 226 deletions allel/opt/model.c

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion allel/opt/model.pyx
Expand Up @@ -185,7 +185,7 @@ def genotype_int8_count_alleles(cnp.int8_t[:, :, :] g not None,
# iterate over alleles
for k in range(ploidy):
allele = g[i, j, k]
if allele >= 0:
if 0 <= allele <= max_allele:
ac[i, allele] += 1

return np.asarray(ac)
Expand Down
156 changes: 103 additions & 53 deletions allel/test/test_model_api.py
Expand Up @@ -92,7 +92,7 @@

class GenotypeArrayInterface(object):

def setup_instance(self, data):
def setup_instance(self, data, dtype=None):
# to be implemented in sub-classes
pass

Expand Down Expand Up @@ -744,62 +744,112 @@ def test_min(self):

def test_count_alleles(self):

# diploid
g = self.setup_instance(diploid_genotype_data)
expect = np.array([[3, 1, 0],
[1, 2, 1],
[1, 2, 1],
[0, 0, 2],
[0, 0, 0]])
actual = g.count_alleles()
aeq(expect, actual)
eq(5, actual.n_variants)
eq(3, actual.n_alleles)

# polyploid
print('test polyploid')
g = self.setup_instance(triploid_genotype_data)
expect = np.array([[5, 1, 0],
[1, 5, 0],
[1, 1, 1],
[0, 0, 0]])
actual = g.count_alleles()
aeq(expect, actual)
eq(4, actual.n_variants)
eq(3, actual.n_alleles)
for dtype in None, 'i1', 'i2':
# make sure we test the optimisations too

# diploid
g = self.setup_instance(diploid_genotype_data, dtype=dtype)
expect = np.array([[3, 1, 0],
[1, 2, 1],
[1, 2, 1],
[0, 0, 2],
[0, 0, 0]])
actual = g.count_alleles()
aeq(expect, actual)
eq(5, actual.n_variants)
eq(3, actual.n_alleles)

# polyploid
g = self.setup_instance(triploid_genotype_data, dtype=dtype)
expect = np.array([[5, 1, 0],
[1, 5, 0],
[1, 1, 1],
[0, 0, 0]])
actual = g.count_alleles()
aeq(expect, actual)
eq(4, actual.n_variants)
eq(3, actual.n_alleles)

def test_count_alleles_subpop(self):
g = self.setup_instance(diploid_genotype_data)
expect = np.array([[2, 0, 0],
[1, 0, 1],
[1, 1, 0],
[0, 0, 2],
[0, 0, 0]])
actual = g.count_alleles(subpop=[0, 2])
aeq(expect, actual)
eq(5, actual.n_variants)
eq(3, actual.n_alleles)
for dtype in None, 'i1', 'i2':
# make sure we test the optimisations too
g = self.setup_instance(diploid_genotype_data, dtype=dtype)
expect = np.array([[2, 0, 0],
[1, 0, 1],
[1, 1, 0],
[0, 0, 2],
[0, 0, 0]])
actual = g.count_alleles(subpop=[0, 2])
aeq(expect, actual)
eq(5, actual.n_variants)
eq(3, actual.n_alleles)

def test_count_alleles_subpops(self):
g = self.setup_instance(diploid_genotype_data)
subpops = {'sub1': [0, 2], 'sub2': [1, 2]}
expect_sub1 = np.array([[2, 0, 0],
[1, 0, 1],
[1, 1, 0],
[0, 0, 2],
[0, 0, 0]])
expect_sub2 = np.array([[1, 1, 0],
[0, 2, 0],
[0, 1, 1],
[0, 0, 0],
[0, 0, 0]])
actual = g.count_alleles_subpops(subpops=subpops)
aeq(expect_sub1, actual['sub1'])
aeq(expect_sub2, actual['sub2'])
eq(5, actual['sub1'].n_variants)
eq(3, actual['sub1'].n_alleles)
eq(5, actual['sub2'].n_variants)
eq(3, actual['sub2'].n_alleles)
for dtype in None, 'i1', 'i2':
# make sure we test the optimisations too
g = self.setup_instance(diploid_genotype_data, dtype=dtype)
subpops = {'sub1': [0, 2], 'sub2': [1, 2]}
expect_sub1 = np.array([[2, 0, 0],
[1, 0, 1],
[1, 1, 0],
[0, 0, 2],
[0, 0, 0]])
expect_sub2 = np.array([[1, 1, 0],
[0, 2, 0],
[0, 1, 1],
[0, 0, 0],
[0, 0, 0]])
actual = g.count_alleles_subpops(subpops=subpops)
aeq(expect_sub1, actual['sub1'])
aeq(expect_sub2, actual['sub2'])
eq(5, actual['sub1'].n_variants)
eq(3, actual['sub1'].n_alleles)
eq(5, actual['sub2'].n_variants)
eq(3, actual['sub2'].n_alleles)

def test_count_alleles_max_allele(self):

for dtype in None, 'i1', 'i2':
# make sure we test the optimisations too

# diploid
g = self.setup_instance(diploid_genotype_data, dtype=dtype)
expect = np.array([[3, 1, 0],
[1, 2, 1],
[1, 2, 1],
[0, 0, 2],
[0, 0, 0]])
actual = g.count_alleles()
eq(3, actual.n_alleles)
aeq(expect, actual)
actual = g.count_alleles(max_allele=2)
eq(3, actual.n_alleles)
aeq(expect, actual)
actual = g.count_alleles(max_allele=1)
eq(2, actual.n_alleles)
aeq(expect[:, :2], actual)
actual = g.count_alleles(max_allele=0)
eq(1, actual.n_alleles)
aeq(expect[:, :1], actual)

# polyploid
g = self.setup_instance(triploid_genotype_data, dtype=dtype)
expect = np.array([[5, 1, 0],
[1, 5, 0],
[1, 1, 1],
[0, 0, 0]])
actual = g.count_alleles()
eq(3, actual.n_alleles)
aeq(expect, actual)
actual = g.count_alleles(max_allele=2)
eq(3, actual.n_alleles)
aeq(expect, actual)
actual = g.count_alleles(max_allele=1)
eq(2, actual.n_alleles)
aeq(expect[:, :2], actual)
actual = g.count_alleles(max_allele=0)
eq(1, actual.n_alleles)
aeq(expect[:, :1], actual)

def test_map_alleles(self):
a = np.array(diploid_genotype_data, dtype=np.int8)
Expand Down
4 changes: 2 additions & 2 deletions allel/test/test_model_bcolz.py
Expand Up @@ -29,8 +29,8 @@ class GenotypeCArrayTests(GenotypeArrayInterface, unittest.TestCase):

_class = GenotypeCArray

def setup_instance(self, data):
return GenotypeCArray(data)
def setup_instance(self, data, dtype=None):
return GenotypeCArray(data, dtype=dtype)

def test_constructor(self):

Expand Down
29 changes: 16 additions & 13 deletions allel/test/test_model_chunked.py
Expand Up @@ -30,8 +30,9 @@ class GenotypeChunkedArrayTests(GenotypeArrayInterface, unittest.TestCase):
def setUp(self):
chunked.storage_registry['default'] = chunked.bcolzmem_storage

def setup_instance(self, data):
data = chunked.storage_registry['default'].array(data, chunklen=2)
def setup_instance(self, data, dtype=None):
data = chunked.storage_registry['default'].array(data, dtype=dtype,
chunklen=2)
return GenotypeChunkedArray(data)

def test_constructor(self):
Expand Down Expand Up @@ -129,8 +130,8 @@ class GenotypeChunkedArrayTestsBColzTmpStorage(GenotypeChunkedArrayTests):
def setUp(self):
chunked.storage_registry['default'] = chunked.bcolztmp_storage

def setup_instance(self, data):
data = chunked.storage_registry['default'].array(data, chunklen=2)
def setup_instance(self, data, dtype=None):
data = chunked.bcolztmp_storage.array(data, dtype=dtype, chunklen=2)
return GenotypeChunkedArray(data)

def test_storage(self):
Expand All @@ -143,10 +144,12 @@ class GenotypeChunkedArrayTestsBColzCustomStorage(GenotypeChunkedArrayTests):

def setUp(self):
chunked.storage_registry['default'] = chunked.BcolzMemStorage(
cparams=bcolz.cparams(cname='zlib', clevel=1))
cparams=bcolz.cparams(cname='zlib', clevel=1)
)

def setup_instance(self, data):
data = chunked.storage_registry['default'].array(data, chunklen=2)
def setup_instance(self, data, dtype=None):
data = chunked.storage_registry['default'].array(data, dtype=dtype,
chunklen=2)
return GenotypeChunkedArray(data)

def test_storage(self):
Expand All @@ -161,8 +164,8 @@ class GenotypeChunkedArrayTestsHDF5MemStorage(GenotypeChunkedArrayTests):
def setUp(self):
chunked.storage_registry['default'] = chunked.hdf5mem_storage

def setup_instance(self, data):
data = chunked.storage_registry['default'].array(data)
def setup_instance(self, data, dtype=None):
data = chunked.hdf5mem_storage.array(data, dtype=dtype)
return GenotypeChunkedArray(data)

def test_storage(self):
Expand All @@ -175,8 +178,8 @@ class GenotypeChunkedArrayTestsHDF5TmpStorage(GenotypeChunkedArrayTests):
def setUp(self):
chunked.storage_registry['default'] = chunked.hdf5tmp_storage

def setup_instance(self, data):
data = chunked.storage_registry['default'].array(data)
def setup_instance(self, data, dtype=None):
data = chunked.hdf5tmp_storage.array(data, dtype=dtype)
return GenotypeChunkedArray(data)

def test_storage(self):
Expand All @@ -189,8 +192,8 @@ class GenotypeChunkedArrayTestsHDF5TmpLZFStorage(GenotypeChunkedArrayTests):
def setUp(self):
chunked.storage_registry['default'] = chunked.hdf5tmp_lzf_storage

def setup_instance(self, data):
data = chunked.storage_registry['default'].array(data)
def setup_instance(self, data, dtype=None):
data = chunked.hdf5tmp_lzf_storage.array(data, dtype=dtype)
return GenotypeChunkedArray(data)

def test_storage(self):
Expand Down
3 changes: 2 additions & 1 deletion allel/test/test_model_dask.py
Expand Up @@ -28,7 +28,8 @@ class GenotypeDaskArrayTests(GenotypeArrayInterface, unittest.TestCase):

_class = GenotypeDaskArray

def setup_instance(self, data):
def setup_instance(self, data, dtype=None):
# ignore dtype
return GenotypeDaskArray.from_array(data, chunks=(2, 2, None))

def test_constructor(self):
Expand Down
4 changes: 2 additions & 2 deletions allel/test/test_model_ndarray.py
Expand Up @@ -25,8 +25,8 @@ class GenotypeArrayTests(GenotypeArrayInterface, unittest.TestCase):

_class = GenotypeArray

def setup_instance(self, data):
return GenotypeArray(data)
def setup_instance(self, data, dtype=None):
return GenotypeArray(data, dtype=dtype)

def test_constructor(self):

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Expand Up @@ -10,4 +10,4 @@ nose
petl
seaborn
toolz
git+https://github.com/blaze/dask.git@master
dask

0 comments on commit 27d4587

Please sign in to comment.