From c2e03c3bd66bf4038c15e5c22d8a0c52abe16cc2 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Wed, 22 May 2024 13:59:21 +0100 Subject: [PATCH 1/2] Add numpy to mypy pre-commit check env (#1893) * Add numpy to mypy pre-commit check env * fixes for zstd * Ignore errors in zarr.buffer --------- Co-authored-by: Norman Rzepka --- .pre-commit-config.yaml | 2 +- pyproject.toml | 1 + src/zarr/array.py | 4 ++-- src/zarr/buffer.py | 27 ++++++++++++++------------- src/zarr/codecs/bytes.py | 4 +++- src/zarr/codecs/sharding.py | 5 +++-- src/zarr/codecs/zstd.py | 7 ++++--- src/zarr/store/local.py | 4 ++-- 8 files changed, 30 insertions(+), 24 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 70812439c..ffa3c94ef 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,8 +26,8 @@ repos: hooks: - id: mypy files: src - args: [] additional_dependencies: - types-redis - types-setuptools - pytest + - numpy diff --git a/pyproject.toml b/pyproject.toml index 62a81144b..947bec936 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -230,6 +230,7 @@ module = [ "zarr.v2.*", "zarr.array_v2", "zarr.array", + "zarr.buffer" ] disallow_untyped_calls = false diff --git a/src/zarr/array.py b/src/zarr/array.py index 86ff26294..2828e2511 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -419,7 +419,7 @@ async def setitem( # We accept any ndarray like object from the user and convert it # to a NDBuffer (or subclass). From this point onwards, we only pass # Buffer and NDBuffer between components. - value = factory(value) + value_buffer = factory(value) # merging with existing data and encoding chunks await self.metadata.codec_pipeline.write( @@ -432,7 +432,7 @@ async def setitem( ) for chunk_coords, chunk_selection, out_selection in indexer ], - value, + value_buffer, ) async def resize( diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py index 84bf6b0bb..e9aa1120f 100644 --- a/src/zarr/buffer.py +++ b/src/zarr/buffer.py @@ -11,6 +11,7 @@ ) import numpy as np +import numpy.typing as npt if TYPE_CHECKING: from typing_extensions import Self @@ -20,8 +21,8 @@ # TODO: create a protocol for the attributes we need, for now we alias Numpy's ndarray # both for the array-like and ndarray-like -ArrayLike: TypeAlias = np.ndarray -NDArrayLike: TypeAlias = np.ndarray +ArrayLike: TypeAlias = npt.NDArray[Any] +NDArrayLike: TypeAlias = npt.NDArray[Any] def check_item_key_is_1d_contiguous(key: Any) -> None: @@ -40,7 +41,7 @@ def __call__( self, *, shape: Iterable[int], - dtype: np.DTypeLike, + dtype: npt.DTypeLike, order: Literal["C", "F"], fill_value: Any | None, ) -> NDBuffer: @@ -163,7 +164,7 @@ def as_array_like(self) -> NDArrayLike: """ return self._data - def as_nd_buffer(self, *, dtype: np.DTypeLike) -> NDBuffer: + def as_nd_buffer(self, *, dtype: npt.DTypeLike) -> NDBuffer: """Create a new NDBuffer from this one. This will never copy data. @@ -179,7 +180,7 @@ def as_nd_buffer(self, *, dtype: np.DTypeLike) -> NDBuffer: """ return NDBuffer.from_ndarray_like(self._data.view(dtype=dtype)) - def as_numpy_array(self) -> np.ndarray: + def as_numpy_array(self) -> npt.NDArray[Any]: """Return the buffer as a NumPy array (host memory). Warning @@ -271,7 +272,7 @@ def create( cls, *, shape: Iterable[int], - dtype: np.DTypeLike, + dtype: npt.DTypeLike, order: Literal["C", "F"] = "C", fill_value: Any | None = None, ) -> Self: @@ -298,7 +299,7 @@ def create( A subclass can overwrite this method to create a ndarray-like object other then the default Numpy array. """ - ret = cls(np.empty(shape=shape, dtype=dtype, order=order)) + ret = cls(np.empty(shape=tuple(shape), dtype=dtype, order=order)) if fill_value is not None: ret.fill(fill_value) return ret @@ -319,7 +320,7 @@ def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self: return cls(ndarray_like) @classmethod - def from_numpy_array(cls, array_like: np.ArrayLike) -> Self: + def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: """Create a new buffer of Numpy array-like object Parameters @@ -360,7 +361,7 @@ def as_buffer(self) -> Buffer: data = np.ascontiguousarray(self._data) return Buffer(data.reshape(-1).view(dtype="b")) # Flatten the array without copy - def as_numpy_array(self) -> np.ndarray: + def as_numpy_array(self) -> npt.NDArray[Any]: """Return the buffer as a NumPy array (host memory). Warning @@ -393,9 +394,9 @@ def byteorder(self) -> Endian: return Endian(sys.byteorder) def reshape(self, newshape: Iterable[int]) -> Self: - return self.__class__(self._data.reshape(newshape)) + return self.__class__(self._data.reshape(tuple(newshape))) - def astype(self, dtype: np.DTypeLike, order: Literal["K", "A", "C", "F"] = "K") -> Self: + def astype(self, dtype: npt.DTypeLike, order: Literal["K", "A", "C", "F"] = "K") -> Self: return self.__class__(self._data.astype(dtype=dtype, order=order)) def __getitem__(self, key: Any) -> Self: @@ -418,11 +419,11 @@ def fill(self, value: Any) -> None: def copy(self) -> Self: return self.__class__(self._data.copy()) - def transpose(self, *axes: np.SupportsIndex) -> Self: + def transpose(self, *axes: np.SupportsIndex) -> Self: # type: ignore[name-defined] return self.__class__(self._data.transpose(*axes)) -def as_numpy_array_wrapper(func: Callable[[np.ndarray], bytes], buf: Buffer) -> Buffer: +def as_numpy_array_wrapper(func: Callable[[npt.NDArray[Any]], bytes], buf: Buffer) -> Buffer: """Converts the input of `func` to a numpy array and the output back to `Buffer`. This function is useful when calling a `func` that only support host memory such diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py index 258115769..aebaf94e7 100644 --- a/src/zarr/codecs/bytes.py +++ b/src/zarr/codecs/bytes.py @@ -92,7 +92,9 @@ async def _encode_single( assert isinstance(chunk_array, NDBuffer) if chunk_array.dtype.itemsize > 1: if self.endian is not None and self.endian != chunk_array.byteorder: - new_dtype = chunk_array.dtype.newbyteorder(self.endian.name) + # type-ignore is a numpy bug + # see https://github.com/numpy/numpy/issues/26473 + new_dtype = chunk_array.dtype.newbyteorder(self.endian.name) # type: ignore[arg-type] chunk_array = chunk_array.astype(new_dtype) return chunk_array.as_buffer() diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 11035f1f2..a68577be6 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, NamedTuple import numpy as np +import numpy.typing as npt from zarr.abc.codec import ( ArrayBytesCodec, @@ -85,7 +86,7 @@ async def delete(self) -> None: class _ShardIndex(NamedTuple): # dtype uint64, shape (chunks_per_shard_0, chunks_per_shard_1, ..., 2) - offsets_and_lengths: np.ndarray + offsets_and_lengths: npt.NDArray[np.uint64] @property def chunks_per_shard(self) -> ChunkCoords: @@ -100,7 +101,7 @@ def _localize_chunk(self, chunk_coords: ChunkCoords) -> ChunkCoords: def is_all_empty(self) -> bool: return bool(np.array_equiv(self.offsets_and_lengths, MAX_UINT_64)) - def get_full_chunk_map(self) -> np.ndarray: + def get_full_chunk_map(self) -> npt.NDArray[np.bool_]: return self.offsets_and_lengths[..., 0] != MAX_UINT_64 def get_chunk_slice(self, chunk_coords: ChunkCoords) -> tuple[int, int] | None: diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py index d53199d0a..76e625ad6 100644 --- a/src/zarr/codecs/zstd.py +++ b/src/zarr/codecs/zstd.py @@ -1,8 +1,9 @@ from __future__ import annotations from dataclasses import dataclass -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any +import numpy.typing as npt from zstandard import ZstdCompressor, ZstdDecompressor from zarr.abc.codec import BytesBytesCodec @@ -52,11 +53,11 @@ def from_dict(cls, data: dict[str, JSON]) -> Self: def to_dict(self) -> dict[str, JSON]: return {"name": "zstd", "configuration": {"level": self.level, "checksum": self.checksum}} - def _compress(self, data: bytes) -> bytes: + def _compress(self, data: npt.NDArray[Any]) -> bytes: ctx = ZstdCompressor(level=self.level, write_checksum=self.checksum) return ctx.compress(data) - def _decompress(self, data: bytes) -> bytes: + def _decompress(self, data: npt.NDArray[Any]) -> bytes: ctx = ZstdDecompressor() return ctx.decompress(data) diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py index 60d0022f9..64eb8632b 100644 --- a/src/zarr/store/local.py +++ b/src/zarr/store/local.py @@ -58,10 +58,10 @@ def _put( if start is not None: with path.open("r+b") as f: f.seek(start) - f.write(value.as_numpy_array()) + f.write(value.as_numpy_array().tobytes()) return None else: - return path.write_bytes(value.as_numpy_array()) + return path.write_bytes(value.as_numpy_array().tobytes()) class LocalStore(Store): From 4da9505ce657513b7a092bae25407ebf0e476775 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 22 May 2024 16:17:45 +0200 Subject: [PATCH 2/2] remove fixture files from src (#1897) --- src/zarr/fixture/.zgroup | 3 --- src/zarr/fixture/flat/.zarray | 23 ----------------------- src/zarr/fixture/flat/0.0 | Bin 48 -> 0 bytes src/zarr/fixture/flat_legacy/.zarray | 22 ---------------------- src/zarr/fixture/flat_legacy/0.0 | Bin 48 -> 0 bytes src/zarr/fixture/meta/.zarray | 23 ----------------------- src/zarr/fixture/meta/0.0 | Bin 48 -> 0 bytes src/zarr/fixture/nested/.zarray | 23 ----------------------- src/zarr/fixture/nested/0/0 | Bin 48 -> 0 bytes src/zarr/fixture/nested_legacy/.zarray | 23 ----------------------- src/zarr/fixture/nested_legacy/0/0 | Bin 48 -> 0 bytes 11 files changed, 117 deletions(-) delete mode 100644 src/zarr/fixture/.zgroup delete mode 100644 src/zarr/fixture/flat/.zarray delete mode 100644 src/zarr/fixture/flat/0.0 delete mode 100644 src/zarr/fixture/flat_legacy/.zarray delete mode 100644 src/zarr/fixture/flat_legacy/0.0 delete mode 100644 src/zarr/fixture/meta/.zarray delete mode 100644 src/zarr/fixture/meta/0.0 delete mode 100644 src/zarr/fixture/nested/.zarray delete mode 100644 src/zarr/fixture/nested/0/0 delete mode 100644 src/zarr/fixture/nested_legacy/.zarray delete mode 100644 src/zarr/fixture/nested_legacy/0/0 diff --git a/src/zarr/fixture/.zgroup b/src/zarr/fixture/.zgroup deleted file mode 100644 index 3b7daf227..000000000 --- a/src/zarr/fixture/.zgroup +++ /dev/null @@ -1,3 +0,0 @@ -{ - "zarr_format": 2 -} \ No newline at end of file diff --git a/src/zarr/fixture/flat/.zarray b/src/zarr/fixture/flat/.zarray deleted file mode 100644 index d1acce766..000000000 --- a/src/zarr/fixture/flat/.zarray +++ /dev/null @@ -1,23 +0,0 @@ -{ - "chunks": [ - 2, - 2 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dimension_separator": ".", - "dtype": "