Skip to content

Commit

Permalink
Drop Python 3.6 in CI; fix h5py and zarr compat issues (#354)
Browse files Browse the repository at this point in the history
* advance python

* fix tests for h5py change

* recythonize

* pull back from py39

* be explicit to make pycharm happy

* add missing requirements

* fix doctests and resolve h5py compat issue

* add test to confirm fix for #353
  • Loading branch information
alimanfoo committed Mar 9, 2021
1 parent 37cd3ce commit 56e070c
Show file tree
Hide file tree
Showing 13 changed files with 7,794 additions and 9,692 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ sudo: true
dist: xenial

python:
- "3.6"
- "3.7"
- "3.8"

Expand Down
28 changes: 26 additions & 2 deletions allel/chunked/storage_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import zarr
import zarr.util
import numcodecs


from allel.chunked import util as _util
Expand Down Expand Up @@ -53,6 +54,18 @@ def array(self, data, expectedlen=None, **kwargs):
# determine chunks
kwargs.setdefault('chunks', default_chunks(data, expectedlen))

# determine object codec
if data.dtype == object:
# peek at first value
peek = data[0]
if isinstance(peek, bytes):
object_codec = numcodecs.VLenBytes()
elif isinstance(peek, str):
object_codec = numcodecs.VLenUTF8()
else:
object_codec = numcodecs.MsgPack()
kwargs.setdefault('object_codec', object_codec)

# create
z = zarr.array(data, **kwargs)

Expand All @@ -63,14 +76,25 @@ def table(self, data, names=None, expectedlen=None, **kwargs):
# setup
names, columns = _util.check_table_like(data, names=names)
kwargs = self._set_defaults(kwargs)
chunks = kwargs.pop('chunks', None)
g = zarr.group(**kwargs)

# create columns
chunks = kwargs.get('chunks', None)
for n, c in zip(names, columns):
if chunks is None:
chunks = default_chunks(c, expectedlen)
g.array(name=n, data=c, chunks=chunks)
if c.dtype == object:
# peek at first value
peek = c[0]
if isinstance(peek, bytes):
object_codec = numcodecs.VLenBytes()
elif isinstance(peek, str):
object_codec = numcodecs.VLenUTF8()
else:
object_codec = numcodecs.MsgPack()
else:
object_codec = None
g.array(name=n, data=c, chunks=chunks, object_codec=object_codec)

# create table
ztbl = ZarrTable(g, names=names)
Expand Down
1 change: 1 addition & 0 deletions allel/chunked/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def check_table_like(data, names=None):

columns = [ensure_array_like(c) for c in columns]
check_equal_length(*columns)

return names, columns


Expand Down
22 changes: 11 additions & 11 deletions allel/model/chunked.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ class VariantChunkedTable(ChunkedTableWrapper):
... h5g.create_dataset('QD', data=qd, chunks=True)
... h5g.create_dataset('AC', data=ac, chunks=True)
...
<HDF5 dataset "CHROM": shape (5,), type "|S4">
<HDF5 dataset "CHROM": shape (5,), type "|O">
<HDF5 dataset "POS": shape (5,), type "<i8">
<HDF5 dataset "DP": shape (5,), type "<i8">
<HDF5 dataset "QD": shape (5,), type "<f8">
Expand All @@ -761,7 +761,7 @@ class VariantChunkedTable(ChunkedTableWrapper):
>>> vt = allel.VariantChunkedTable(callset['/3L/variants'],
... names=['CHROM', 'POS', 'AC', 'QD', 'DP'])
>>> vt
<VariantChunkedTable shape=(5,) dtype=[('CHROM', 'S4'), ('POS', '<i8'), ('AC', ...
<VariantChunkedTable shape=(5,) dtype=[('CHROM', 'O'), ('POS', '<i8'), ('AC', ...
...
Obtain a single row::
Expand All @@ -772,33 +772,33 @@ class VariantChunkedTable(ChunkedTableWrapper):
Obtain a numpy array by slicing::
>>> vt[:] # doctest: +ELLIPSIS
<VariantTable shape=(5,) dtype=(numpy.record, [('CHROM', 'S4'), ('POS', '<i8'), ...
<VariantTable shape=(5,) dtype=(numpy.record, [('CHROM', 'O'), ('POS', '<i8'), ...
[(b'chr1', 2, [ 1, 2], 4.5, 35) (b'chr1', 7, [ 3, 4], 6.7, 12)
(b'chr2', 3, [ 5, 6], 1.2, 78) (b'chr2', 9, [ 7, 8], 4.4, 22)
(b'chr3', 6, [ 9, 10], 2.8, 99)]
Access a subset of columns::
>>> vt[['CHROM', 'POS']]
<VariantChunkedTable shape=(5,) dtype=[('CHROM', 'S4'), ('POS', '<i8')]
nbytes=60 cbytes=60 cratio=1.0
<VariantChunkedTable shape=(5,) dtype=[('CHROM', 'O'), ('POS', '<i8')]
nbytes=80 cbytes=... cratio=...
values=builtins.list>
Note that most methods will return a chunked table, using whatever
chunked storage is set as default (bcolz ctable) or specified
directly via the `storage` keyword argument. E.g.::
>>> vt.copy() # doctest: +ELLIPSIS
<VariantChunkedTable shape=(5,) dtype=[('CHROM', 'S4'), ('POS', '<i8'), ('AC', ...
nbytes=220 cbytes=... cratio=...
<VariantChunkedTable shape=(5,) dtype=[('CHROM', 'O'), ('POS', '<i8'), ('AC', ...
nbytes=... cbytes=... cratio=...
values=allel.chunked.storage_zarr.ZarrTable>
>>> vt.copy(storage='bcolzmem') # doctest: +ELLIPSIS
<VariantChunkedTable shape=(5,) dtype=[('CHROM', 'S4'), ('POS', '<i8'), ('AC', ...
nbytes=220 cbytes=... cratio=...
<VariantChunkedTable shape=(5,) dtype=[('CHROM', 'O'), ('POS', '<i8'), ('AC', ...
nbytes=... cbytes=... cratio=...
values=bcolz.ctable.ctable>
>>> vt.copy(storage='hdf5mem_zlib1') # doctest: +ELLIPSIS
<VariantChunkedTable shape=(5,) dtype=[('CHROM', 'S4'), ('POS', '<i8'), ('AC', ...
nbytes=220 cbytes=... cratio=...
<VariantChunkedTable shape=(5,) dtype=[('CHROM', 'O'), ('POS', '<i8'), ('AC', ...
nbytes=... cbytes=... cratio=...
values=h5py._hl.files.File>
"""
Expand Down

0 comments on commit 56e070c

Please sign in to comment.