Skip to content

Commit

Permalink
Merge branch 'v3' into store_tests
Browse files Browse the repository at this point in the history
  • Loading branch information
d-v-b committed May 22, 2024
2 parents bc31613 + b1f4c50 commit 07fc249
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 77 deletions.
4 changes: 2 additions & 2 deletions src/zarr/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,12 +582,12 @@ def store_path(self) -> StorePath:
def order(self) -> Literal["C", "F"]:
return self._async_array.order

def __getitem__(self, selection: Selection) -> npt.NDArray[Any]:
def __getitem__(self, selection: Selection) -> NDArrayLike:
return sync(
self._async_array.getitem(selection),
)

def __setitem__(self, selection: Selection, value: npt.NDArray[Any]) -> None:
def __setitem__(self, selection: Selection, value: NDArrayLike) -> None:
sync(
self._async_array.setitem(selection, value),
)
Expand Down
135 changes: 80 additions & 55 deletions src/zarr/buffer.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,94 @@
from __future__ import annotations

import sys
from collections.abc import Callable, Iterable
from collections.abc import Callable, Iterable, Sequence
from typing import (
TYPE_CHECKING,
Any,
Literal,
Protocol,
TypeAlias,
SupportsIndex,
runtime_checkable,
)

import numpy as np
import numpy.typing as npt

from zarr.common import ChunkCoords

if TYPE_CHECKING:
from typing_extensions import Self

from zarr.codecs.bytes import Endian
from zarr.common import BytesLike

# TODO: create a protocol for the attributes we need, for now we alias Numpy's ndarray
# both for the array-like and ndarray-like
ArrayLike: TypeAlias = npt.NDArray[Any]
NDArrayLike: TypeAlias = npt.NDArray[Any]

@runtime_checkable
class ArrayLike(Protocol):
"""Protocol for the array-like type that underlie Buffer"""

@property
def dtype(self) -> np.dtype[Any]: ...

@property
def ndim(self) -> int: ...

@property
def size(self) -> int: ...

def __getitem__(self, key: slice) -> Self: ...

def __setitem__(self, key: slice, value: Any) -> None: ...


@runtime_checkable
class NDArrayLike(Protocol):
"""Protocol for the nd-array-like type that underlie NDBuffer"""

@property
def dtype(self) -> np.dtype[Any]: ...

@property
def ndim(self) -> int: ...

@property
def size(self) -> int: ...

@property
def shape(self) -> ChunkCoords: ...

def __len__(self) -> int: ...

def __getitem__(self, key: slice) -> Self: ...

def __setitem__(self, key: slice, value: Any) -> None: ...

def reshape(self, shape: ChunkCoords, *, order: Literal["A", "C", "F"] = ...) -> Self: ...

def view(self, dtype: npt.DTypeLike) -> Self: ...

def astype(self, dtype: npt.DTypeLike, order: Literal["K", "A", "C", "F"] = ...) -> Self: ...

def fill(self, value: Any) -> None: ...

def copy(self) -> Self: ...

def transpose(self, axes: SupportsIndex | Sequence[SupportsIndex] | None) -> Self: ...

def ravel(self, order: Literal["K", "A", "C", "F"] = "C") -> Self: ...

def all(self) -> bool: ...

def __eq__(self, other: Any) -> Self: # type: ignore
"""Element-wise equal
Notice
------
Type checkers such as mypy complains because the return type isn't a bool like
its supertype "object", which violates the Liskov substitution principle.
This is true, but since NumPy's ndarray is defined as an element-wise equal,
our hands are tied.
"""


def check_item_key_is_1d_contiguous(key: Any) -> None:
Expand Down Expand Up @@ -124,7 +190,7 @@ def create_zero_length(cls) -> Self:
return cls(np.array([], dtype="b"))

@classmethod
def from_array_like(cls, array_like: NDArrayLike) -> Self:
def from_array_like(cls, array_like: ArrayLike) -> Self:
"""Create a new buffer of a array-like object
Parameters
Expand Down Expand Up @@ -153,7 +219,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self:
"""
return cls.from_array_like(np.frombuffer(bytes_like, dtype="b"))

def as_array_like(self) -> NDArrayLike:
def as_array_like(self) -> ArrayLike:
"""Return the underlying array (host or device memory) of this buffer
This will never copy data.
Expand All @@ -164,22 +230,6 @@ def as_array_like(self) -> NDArrayLike:
"""
return self._data

def as_nd_buffer(self, *, dtype: npt.DTypeLike) -> NDBuffer:
"""Create a new NDBuffer from this one.
This will never copy data.
Parameters
----------
dtype
The datatype of the returned buffer (reinterpretation of the bytes)
Return
------
New NDbuffer representing `self.as_array_like()`
"""
return NDBuffer.from_ndarray_like(self._data.view(dtype=dtype))

def as_numpy_array(self) -> npt.NDArray[Any]:
"""Return the buffer as a NumPy array (host memory).
Expand Down Expand Up @@ -223,17 +273,8 @@ def __add__(self, other: Buffer) -> Self:

other_array = other.as_array_like()
assert other_array.dtype == np.dtype("b")
return self.__class__(np.concatenate((self._data, other_array)))

def __eq__(self, other: Any) -> bool:
if isinstance(other, bytes | bytearray):
# Many of the tests compares `Buffer` with `bytes` so we
# convert the bytes to a Buffer and try again
return self == self.from_bytes(other)
if isinstance(other, Buffer):
return (self._data == other.as_array_like()).all()
raise ValueError(
f"equal operator not supported between {self.__class__} and {other.__class__}"
return self.__class__(
np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array)))
)


Expand Down Expand Up @@ -345,22 +386,6 @@ def as_ndarray_like(self) -> NDArrayLike:
"""
return self._data

def as_buffer(self) -> Buffer:
"""Create a new Buffer from this one.
Warning
-------
Copies data if the buffer is non-contiguous.
Return
------
The new buffer (might be data copy)
"""
data = self._data
if not self._data.flags.contiguous:
data = np.ascontiguousarray(self._data)
return Buffer(data.reshape(-1).view(dtype="b")) # Flatten the array without copy

def as_numpy_array(self) -> npt.NDArray[Any]:
"""Return the buffer as a NumPy array (host memory).
Expand Down Expand Up @@ -393,8 +418,8 @@ def byteorder(self) -> Endian:
else:
return Endian(sys.byteorder)

def reshape(self, newshape: Iterable[int]) -> Self:
return self.__class__(self._data.reshape(tuple(newshape)))
def reshape(self, newshape: ChunkCoords) -> Self:
return self.__class__(self._data.reshape(newshape))

def astype(self, dtype: npt.DTypeLike, order: Literal["K", "A", "C", "F"] = "K") -> Self:
return self.__class__(self._data.astype(dtype=dtype, order=order))
Expand All @@ -419,8 +444,8 @@ def fill(self, value: Any) -> None:
def copy(self) -> Self:
return self.__class__(self._data.copy())

def transpose(self, *axes: np.SupportsIndex) -> Self: # type: ignore[name-defined]
return self.__class__(self._data.transpose(*axes))
def transpose(self, axes: SupportsIndex | Sequence[SupportsIndex] | None) -> Self:
return self.__class__(self._data.transpose(axes))


def as_numpy_array_wrapper(func: Callable[[npt.NDArray[Any]], bytes], buf: Buffer) -> Buffer:
Expand Down
16 changes: 13 additions & 3 deletions src/zarr/codecs/bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np

from zarr.abc.codec import ArrayBytesCodec
from zarr.buffer import Buffer, NDBuffer
from zarr.buffer import Buffer, NDArrayLike, NDBuffer
from zarr.codecs.registry import register_codec
from zarr.common import parse_enum, parse_named_configuration

Expand Down Expand Up @@ -75,7 +75,13 @@ async def _decode_single(
dtype = np.dtype(f"{prefix}{chunk_spec.dtype.str[1:]}")
else:
dtype = np.dtype(f"|{chunk_spec.dtype.str[1:]}")
chunk_array = chunk_bytes.as_nd_buffer(dtype=dtype)

as_array_like = chunk_bytes.as_array_like()
if isinstance(as_array_like, NDArrayLike):
as_nd_array_like = as_array_like
else:
as_nd_array_like = np.asanyarray(as_array_like)
chunk_array = NDBuffer.from_ndarray_like(as_nd_array_like.view(dtype=dtype))

# ensure correct chunk shape
if chunk_array.shape != chunk_spec.shape:
Expand All @@ -96,7 +102,11 @@ async def _encode_single(
# see https://github.com/numpy/numpy/issues/26473
new_dtype = chunk_array.dtype.newbyteorder(self.endian.name) # type: ignore[arg-type]
chunk_array = chunk_array.astype(new_dtype)
return chunk_array.as_buffer()

as_nd_array_like = chunk_array.as_ndarray_like()
# Flatten the nd-array (only copy if needed)
as_nd_array_like = as_nd_array_like.ravel().view(dtype="b")
return Buffer.from_array_like(as_nd_array_like)

def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
return input_byte_length
Expand Down
1 change: 1 addition & 0 deletions src/zarr/testing/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from zarr.abc.store import Store
from zarr.buffer import Buffer
from zarr.testing.utils import assert_bytes_equal


def _normalize_byte_range(
Expand Down
18 changes: 18 additions & 0 deletions src/zarr/testing/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from __future__ import annotations

from zarr.buffer import Buffer
from zarr.common import BytesLike


def assert_bytes_equal(b1: Buffer | BytesLike | None, b2: Buffer | BytesLike | None) -> None:
"""Help function to assert if two bytes-like or Buffers are equal
Warning
-------
Always copies data, only use for testing and debugging
"""
if isinstance(b1, Buffer):
b1 = b1.to_bytes()
if isinstance(b2, Buffer):
b2 = b2.to_bytes()
assert b1 == b2
9 changes: 9 additions & 0 deletions tests/v3/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

from collections.abc import Iterator
from types import ModuleType
from typing import TYPE_CHECKING

from zarr.common import ZarrFormat
Expand Down Expand Up @@ -81,3 +83,10 @@ async def async_group(request: pytest.FixtureRequest, tmpdir) -> AsyncGroup:
exists_ok=False,
)
return agroup


@pytest.fixture(params=["numpy", "cupy"])
def xp(request: pytest.FixtureRequest) -> Iterator[ModuleType]:
"""Fixture to parametrize over numpy-like libraries"""

yield pytest.importorskip(request.param)
15 changes: 9 additions & 6 deletions tests/v3/test_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
import pytest

from zarr.array import AsyncArray
from zarr.buffer import NDBuffer
from zarr.store.core import StorePath
from zarr.store.memory import MemoryStore
from zarr.buffer import ArrayLike, NDArrayLike, NDBuffer

if TYPE_CHECKING:
from typing_extensions import Self
Expand Down Expand Up @@ -41,12 +39,17 @@ def create(
return ret


def test_nd_array_like(xp):
ary = xp.arange(10)
assert isinstance(ary, ArrayLike)
assert isinstance(ary, NDArrayLike)


@pytest.mark.asyncio
async def test_async_array_factory():
store = StorePath(MemoryStore())
async def test_async_array_factory(store_path):
expect = np.zeros((9, 9), dtype="uint16", order="F")
a = await AsyncArray.create(
store / "test_async_array",
store_path,
shape=expect.shape,
chunk_shape=(5, 5),
dtype=expect.dtype,
Expand Down
Loading

0 comments on commit 07fc249

Please sign in to comment.