Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add name kwarg to from_zarr (#1) #4663

Merged
merged 6 commits into from Apr 18, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -26,7 +26,7 @@ copy NUL %CONDA_PREFIX%\conda-meta\pinned
@rem Install optional dependencies for tests
%CONDA_INSTALL% numpy pandas cloudpickle distributed
%CONDA_INSTALL% bcolz bokeh h5py ipython lz4 psutil pytables s3fs scipy
%CONDA_INSTALL% -c conda-forge fastparquet snappy
%CONDA_INSTALL% -c conda-forge fastparquet snappy zarr
mpeaton marked this conversation as resolved.
Show resolved Hide resolved
mpeaton marked this conversation as resolved.
Show resolved Hide resolved

%PIP_INSTALL% --no-deps --upgrade locket git+https://github.com/dask/partd
%PIP_INSTALL% --no-deps --upgrade heapdict git+https://github.com/dask/cachey
Expand Down
8 changes: 6 additions & 2 deletions dask/array/core.py
Expand Up @@ -2149,7 +2149,7 @@ def from_array(x, chunks, name=None, lock=False, asarray=True, fancy=True,
return Array(dsk, name, chunks, dtype=x.dtype)


def from_zarr(url, component=None, storage_options=None, chunks=None, **kwargs):
def from_zarr(url, component=None, storage_options=None, chunks=None,name=None, **kwargs):
"""Load array from the zarr storage format

See https://zarr.readthedocs.io for details about the format.
Expand All @@ -2170,6 +2170,8 @@ def from_zarr(url, component=None, storage_options=None, chunks=None, **kwargs):
Passed to ``da.from_array``, allows setting the chunks on
initialisation, if the chunking scheme in the on-disc dataset is not
optimal for the calculations to follow.
name : str, optional
An optional keyname for the dataframe. Defaults to hashing the input
mpeaton marked this conversation as resolved.
Show resolved Hide resolved
kwargs: passed to ``zarr.Array``.
"""
import zarr
Expand All @@ -2186,7 +2188,9 @@ def from_zarr(url, component=None, storage_options=None, chunks=None, **kwargs):
mapper = url
z = zarr.Array(mapper, read_only=True, path=component, **kwargs)
chunks = chunks if chunks is not None else z.chunks
return from_array(z, chunks, name='zarr-%s' % url)
if name is None:
name = 'from-zarr-' + tokenize(z, component, storage_options, chunks, **kwargs)
return from_array(z, chunks, name=name )


def to_zarr(arr, url, component=None, storage_options=None,
Expand Down
14 changes: 14 additions & 0 deletions dask/array/tests/test_array_core.py
Expand Up @@ -3437,6 +3437,20 @@ def test_normalize_chunks_nan():
assert "auto" in str(info.value)


def test_from_zarr_unique_name():
mpeaton marked this conversation as resolved.
Show resolved Hide resolved
zarr = pytest.importorskip('zarr')
a = zarr.array([1, 2, 3])
b = zarr.array([4, 5, 6])

assert da.from_zarr(a).name != da.from_zarr(b).name


def test_from_zarr_name():
zarr = pytest.importorskip('zarr')
a = zarr.array([1, 2, 3])
assert da.from_zarr(a, name='foo').name == 'foo'


def test_zarr_roundtrip():
pytest.importorskip('zarr')
with tmpdir() as d:
Expand Down