From 40ea349d4544fb98ab1562b5a10a335a29cc8c83 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 11 Jun 2024 22:35:54 +0200 Subject: [PATCH] create metadata module, move the metadata classes there, restore proper attributes of metadata classes --- src/zarr/api/asynchronous.py | 7 +- src/zarr/array.py | 16 +- src/zarr/codecs/sharding.py | 3 +- src/zarr/metadata/__init__.py | 19 + src/zarr/metadata/v2.py | 158 ++++++++ src/zarr/{metadata.py => metadata/v3.py} | 439 ++++++----------------- tests/v3/test_metadata.py | 12 +- 7 files changed, 303 insertions(+), 351 deletions(-) create mode 100644 src/zarr/metadata/__init__.py create mode 100644 src/zarr/metadata/v2.py rename src/zarr/{metadata.py => metadata/v3.py} (52%) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 52d07fb6f..dfdbc5b70 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -14,7 +14,8 @@ from zarr.chunk_key_encodings import ChunkKeyEncoding from zarr.common import JSON, ChunkCoords, MemoryOrder, OpenMode, ZarrFormat from zarr.group import AsyncGroup -from zarr.metadata import ArrayV2Metadata, ArrayV3Metadata +from zarr.metadata.v2 import ArrayMetadata +from zarr.metadata.v3 import ArrayMetadata from zarr.store import ( StoreLike, make_store_path, @@ -59,11 +60,11 @@ def _like_args(a: ArrayLike, kwargs: dict[str, Any]) -> dict[str, Any]: if isinstance(a, AsyncArray): new["order"] = a.order - if isinstance(a.metadata, ArrayV2Metadata): + if isinstance(a.metadata, ArrayMetadata): new["compressor"] = a.metadata.compressor new["filters"] = a.metadata.filters - if isinstance(a.metadata, ArrayV3Metadata): + if isinstance(a.metadata, ArrayMetadata): new["codecs"] = a.metadata.codecs else: raise ValueError(f"Unsupported zarr format: {a.metadata.zarr_format}") diff --git a/src/zarr/array.py b/src/zarr/array.py index 9ac1ce41e..a86be20e1 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -59,7 +59,9 @@ is_scalar, pop_fields, ) -from zarr.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata +from zarr.metadata import ArrayMetadata +from zarr.metadata.v2 import ArrayMetadata +from zarr.metadata.v3 import ArrayMetadata from zarr.store import StoreLike, StorePath, make_store_path from zarr.sync import sync @@ -69,9 +71,9 @@ def parse_array_metadata(data: Any) -> ArrayMetadata: return data elif isinstance(data, dict): if data["zarr_format"] == 3: - return ArrayV3Metadata.from_dict(data) + return ArrayMetadata.from_dict(data) elif data["zarr_format"] == 2: - return ArrayV2Metadata.from_dict(data) + return ArrayMetadata.from_dict(data) raise TypeError @@ -231,7 +233,7 @@ async def _create_v3( else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1]) ) - metadata = ArrayV3Metadata( + metadata = ArrayMetadata( shape=shape, data_type=dtype, chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), @@ -274,7 +276,7 @@ async def _create_v2( if dimension_separator is None: dimension_separator = "." - metadata = ArrayV2Metadata( + metadata = ArrayMetadata( shape=shape, dtype=np.dtype(dtype), chunks=chunks, @@ -349,13 +351,13 @@ async def open( zarray_dict = json.loads(zarray_bytes.to_bytes()) zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} zarray_dict["attributes"] = zattrs_dict - return cls(store_path=store_path, metadata=ArrayV2Metadata.from_dict(zarray_dict)) + return cls(store_path=store_path, metadata=ArrayMetadata.from_dict(zarray_dict)) else: # V3 arrays are comprised of a zarr.json object assert zarr_json_bytes is not None return cls( store_path=store_path, - metadata=ArrayV3Metadata.from_dict(json.loads(zarr_json_bytes.to_bytes())), + metadata=ArrayMetadata.from_dict(json.loads(zarr_json_bytes.to_bytes())), ) @property diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 74ad5ac44..ea2105533 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -34,7 +34,8 @@ product, ) from zarr.indexing import BasicIndexer, SelectorTuple, c_order_iter, get_indexer, morton_order_iter -from zarr.metadata import ArrayMetadata, parse_codecs +from zarr.metadata import ArrayMetadata +from zarr.metadata.v3 import parse_codecs if TYPE_CHECKING: from collections.abc import Awaitable, Callable, Iterator diff --git a/src/zarr/metadata/__init__.py b/src/zarr/metadata/__init__.py new file mode 100644 index 000000000..d724752a1 --- /dev/null +++ b/src/zarr/metadata/__init__.py @@ -0,0 +1,19 @@ +from __future__ import annotations +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from typing import Any + +import numpy as np + +def jsonify_dtype( + o: np.dtype[Any], +) -> str | list[tuple[str, str] | tuple[str, str, tuple[int, ...]]]: + """ + JSON serialization for a numpy dtype + """ + if isinstance(o, np.dtype): + if o.fields is None: + return o.str + else: + return o.descr + raise TypeError \ No newline at end of file diff --git a/src/zarr/metadata/v2.py b/src/zarr/metadata/v2.py new file mode 100644 index 000000000..b6977c98c --- /dev/null +++ b/src/zarr/metadata/v2.py @@ -0,0 +1,158 @@ +from __future__ import annotations +from typing import TYPE_CHECKING + +from zarr.abc.metadata import Metadata +if TYPE_CHECKING: + from typing_extensions import Self + import numpy.typing as npt + from zarr.abc.codec import Codec + +from zarr.abc.codec import CodecPipeline +from zarr.array_spec import ArraySpec +from zarr.buffer import Buffer, BufferPrototype +from zarr.chunk_key_encodings import parse_separator +from zarr.codecs._v2 import V2Compressor, V2Filters +from zarr.common import JSON, ZARRAY_JSON, ZATTRS_JSON, ChunkCoords, parse_dtype, parse_fill_value, parse_shapelike +from zarr.config import parse_indexing_order +from zarr.metadata import ArrayMetadata, jsonify_dtype + +import numpy as np +import json +from dataclasses import dataclass, field, replace +from typing import Any, Literal + +from zarr.metadata.v3 import parse_attributes + + +def parse_zarr_format(data: Any) -> Literal[2]: + if data == 2: + return 2 + raise ValueError(f"Invalid value. Expected 2. Got {data}.") + + +def parse_filters(data: Any) -> list[dict[str, JSON]] | None: + return data + + +def parse_compressor(data: Any) -> dict[str, JSON] | None: + return data + + +def parse_metadata(data: ArrayMetadata) -> ArrayMetadata: + """ + Perform validation of an entire ArrayMetadata instance, raising exceptions if there + are any problems with the metadata. Returns valid metadata. + """ + if (l_chunks := len(data.chunks)) != (l_shape := len(data.shape)): + msg = ( + f"The `shape` and `chunks` attributes must have the same length. " + f"`chunks` has length {l_chunks}, but `shape` has length {l_shape}." + ) + raise ValueError(msg) + return data + + +@dataclass(frozen=True, kw_only=True) +class ArrayMetadata(Metadata): + shape: ChunkCoords + chunks: tuple[int, ...] + data_type: np.dtype[Any] + fill_value: None | int | float = 0 + order: Literal["C", "F"] = "C" + filters: list[dict[str, JSON]] | None = None + dimension_separator: Literal[".", "/"] = "." + compressor: dict[str, JSON] | None = None + attributes: dict[str, JSON] = field(default_factory=dict) + zarr_format: Literal[2] = field(init=False, default=2) + + def __init__( + self, + *, + shape: ChunkCoords, + dtype: npt.DTypeLike, + chunks: ChunkCoords, + fill_value: Any, + order: Literal["C", "F"], + dimension_separator: Literal[".", "/"] = ".", + compressor: dict[str, JSON] | None = None, + filters: list[dict[str, JSON]] | None = None, + attributes: dict[str, JSON] | None = None, + ): + """ + Metadata for a Zarr version 2 array. + """ + shape_parsed = parse_shapelike(shape) + data_type_parsed = parse_dtype(dtype) + chunks_parsed = parse_shapelike(chunks) + compressor_parsed = parse_compressor(compressor) + order_parsed = parse_indexing_order(order) + dimension_separator_parsed = parse_separator(dimension_separator) + filters_parsed = parse_filters(filters) + fill_value_parsed = parse_fill_value(fill_value) + attributes_parsed = parse_attributes(attributes) + + object.__setattr__(self, "shape", shape_parsed) + object.__setattr__(self, "dtype", data_type_parsed) + object.__setattr__(self, "chunks", chunks_parsed) + object.__setattr__(self, "compressor", compressor_parsed) + object.__setattr__(self, "order", order_parsed) + object.__setattr__(self, "dimension_separator", dimension_separator_parsed) + object.__setattr__(self, "filters", filters_parsed) + object.__setattr__(self, "fill_value", fill_value_parsed) + object.__setattr__(self, "attributes", attributes_parsed) + + # ensure that the metadata document is consistent + _ = parse_metadata(self) + + @property + def ndim(self) -> int: + return len(self.shape) + + @property + def dtype(self) -> np.dtype[Any]: + return self.data_type + + @property + def codec_pipeline(self) -> CodecPipeline: + from zarr.codecs import BatchedCodecPipeline + + return BatchedCodecPipeline.from_list( + [V2Filters(self.filters or []), V2Compressor(self.compressor)] + ) + + def to_buffer_dict(self) -> dict[str, Buffer]: + zarray_dict = self.to_dict() + zattrs_dict = zarray_dict.pop("attributes", {}) + return { + ZARRAY_JSON: Buffer.from_bytes(json.dumps(zarray_dict, default=jsonify_dtype).encode()), + ZATTRS_JSON: Buffer.from_bytes(json.dumps(zattrs_dict).encode()), + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> ArrayMetadata: + # check that the zarr_format attribute is correct + _ = parse_zarr_format(data.pop("zarr_format")) + return cls(**data) + + def get_chunk_spec( + self, _chunk_coords: ChunkCoords, order: Literal["C", "F"], prototype: BufferPrototype + ) -> ArraySpec: + return ArraySpec( + shape=self.chunk_grid.chunk_shape, + dtype=self.dtype, + fill_value=self.fill_value, + order=order, + prototype=prototype, + ) + + def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str: + chunk_identifier = self.dimension_separator.join(map(str, chunk_coords)) + return "0" if chunk_identifier == "" else chunk_identifier + + def update_shape(self, shape: ChunkCoords) -> Self: + return replace(self, shape=shape) + + def update_attributes(self, attributes: dict[str, JSON]) -> Self: + return replace(self, attributes=attributes) + + diff --git a/src/zarr/metadata.py b/src/zarr/metadata/v3.py similarity index 52% rename from src/zarr/metadata.py rename to src/zarr/metadata/v3.py index 8329bd920..acee940e1 100644 --- a/src/zarr/metadata.py +++ b/src/zarr/metadata/v3.py @@ -1,172 +1,76 @@ from __future__ import annotations +from typing import TYPE_CHECKING +if TYPE_CHECKING: + import numpy.typing as npt + from typing_extensions import Self -import json -from abc import ABC, abstractmethod -from collections.abc import Iterable -from dataclasses import dataclass, field, replace -from enum import Enum -from typing import TYPE_CHECKING, Any, Literal - -import numpy as np -import numpy.typing as npt - -from zarr.abc.codec import Codec, CodecPipeline +from zarr.abc.codec import Codec from zarr.abc.metadata import Metadata +from zarr.array_spec import ArraySpec from zarr.buffer import Buffer, BufferPrototype, default_buffer_prototype from zarr.chunk_grids import ChunkGrid, RegularChunkGrid -from zarr.chunk_key_encodings import ChunkKeyEncoding, parse_separator -from zarr.codecs._v2 import V2Compressor, V2Filters +from zarr.chunk_key_encodings import ChunkKeyEncoding +from zarr.common import JSON, ZARR_JSON, ChunkCoords, parse_dtype, parse_fill_value, parse_shapelike +from zarr.metadata import ArrayMetadata, _bool -if TYPE_CHECKING: - from typing_extensions import Self import numcodecs.abc - -from zarr.array_spec import ArraySpec -from zarr.common import ( - JSON, - ZARR_JSON, - ZARRAY_JSON, - ZATTRS_JSON, - ChunkCoords, - ZarrFormat, - parse_dtype, - parse_fill_value, - parse_shapelike, -) -from zarr.config import parse_indexing_order - -# For type checking -_bool = bool - - -__all__ = ["ArrayMetadata"] +import numpy as np -class DataType(Enum): - bool = "bool" - int8 = "int8" - int16 = "int16" - int32 = "int32" - int64 = "int64" - uint8 = "uint8" - uint16 = "uint16" - uint32 = "uint32" - uint64 = "uint64" - float32 = "float32" - float64 = "float64" - - @property - def byte_count(self) -> int: - data_type_byte_counts = { - DataType.bool: 1, - DataType.int8: 1, - DataType.int16: 2, - DataType.int32: 4, - DataType.int64: 8, - DataType.uint8: 1, - DataType.uint16: 2, - DataType.uint32: 4, - DataType.uint64: 8, - DataType.float32: 4, - DataType.float64: 8, - } - return data_type_byte_counts[self] +import json +from collections.abc import Iterable +from dataclasses import dataclass, field, replace +from enum import Enum +from typing import Any, Literal - @property - def has_endianness(self) -> _bool: - # This might change in the future, e.g. for a complex with 2 8-bit floats - return self.byte_count != 1 - def to_numpy_shortname(self) -> str: - data_type_to_numpy = { - DataType.bool: "bool", - DataType.int8: "i1", - DataType.int16: "i2", - DataType.int32: "i4", - DataType.int64: "i8", - DataType.uint8: "u1", - DataType.uint16: "u2", - DataType.uint32: "u4", - DataType.uint64: "u8", - DataType.float32: "f4", - DataType.float64: "f8", - } - return data_type_to_numpy[self] - @classmethod - def from_dtype(cls, dtype: np.dtype[Any]) -> DataType: - dtype_to_data_type = { - "|b1": "bool", - "bool": "bool", - "|i1": "int8", - " Literal[3]: + if data == 3: + return 3 + raise ValueError(f"Invalid value. Expected 3. Got {data}.") -@dataclass(frozen=True, kw_only=True) -class ArrayMetadata(Metadata, ABC): - shape: ChunkCoords - fill_value: Any - chunk_grid: ChunkGrid - attributes: dict[str, JSON] - zarr_format: ZarrFormat +def parse_node_type_array(data: Literal["array"]) -> Literal["array"]: + if data == "array": + return data + raise ValueError(f"Invalid value. Expected 'array'. Got {data}.") - @property - @abstractmethod - def dtype(self) -> np.dtype[Any]: - pass - @property - @abstractmethod - def ndim(self) -> int: - pass +def parse_attributes(data: Any) -> dict[str, JSON]: + if data is None: + return {} - @property - @abstractmethod - def codec_pipeline(self) -> CodecPipeline: - pass + return data - @abstractmethod - def get_chunk_spec( - self, _chunk_coords: ChunkCoords, order: Literal["C", "F"], prototype: BufferPrototype - ) -> ArraySpec: - pass - @abstractmethod - def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str: - pass +def parse_dimension_names(data: Any) -> tuple[str, ...] | None: + if data is None: + return data + elif all([isinstance(x, str) for x in data]): + return tuple(data) + else: + msg = f"Expected either None or a iterable of str, got {type(data)}" + raise TypeError(msg) - @abstractmethod - def to_buffer_dict(self) -> dict[str, Buffer]: - pass - @abstractmethod - def update_shape(self, shape: ChunkCoords) -> Self: - pass +def parse_codecs(data: Iterable[Codec | JSON]) -> CodecPipeline: + from zarr.codecs import BatchedCodecPipeline - @abstractmethod - def update_attributes(self, attributes: dict[str, JSON]) -> Self: - pass + if not isinstance(data, Iterable): + raise TypeError(f"Expected iterable, got {type(data)}") + return BatchedCodecPipeline.from_dict(data) @dataclass(frozen=True, kw_only=True) -class ArrayV3Metadata(ArrayMetadata): +class ArrayMetadata(Metadata): shape: ChunkCoords data_type: np.dtype[Any] chunk_grid: ChunkGrid chunk_key_encoding: ChunkKeyEncoding fill_value: Any - codecs: CodecPipeline + codecs: tuple[Codec, ...] attributes: dict[str, Any] = field(default_factory=dict) dimension_names: tuple[str, ...] | None = None zarr_format: Literal[3] = field(default=3, init=False) @@ -238,10 +142,6 @@ def dtype(self) -> np.dtype[Any]: def ndim(self) -> int: return len(self.shape) - @property - def codec_pipeline(self) -> CodecPipeline: - return self.codecs - def get_chunk_spec( self, _chunk_coords: ChunkCoords, order: Literal["C", "F"], prototype: BufferPrototype ) -> ArraySpec: @@ -277,10 +177,10 @@ def _json_convert(o: np.dtype[Any] | Enum | Codec) -> str | dict[str, Any]: } @classmethod - def from_dict(cls, data: dict[str, JSON]) -> ArrayV3Metadata: + def from_dict(cls, data: dict[str, JSON]) -> ArrayMetadata: # TODO: Remove the type: ignores[] comments below and use a TypedDict to type `data` # check that the zarr_format attribute is correct - _ = parse_zarr_format_v3(data.pop("zarr_format")) # type: ignore[arg-type] + _ = parse_zarr_format(data.pop("zarr_format")) # type: ignore[arg-type] # check that the node_type attribute is correct _ = parse_node_type_array(data.pop("node_type")) # type: ignore[arg-type] @@ -307,202 +207,71 @@ def update_attributes(self, attributes: dict[str, JSON]) -> Self: return replace(self, attributes=attributes) -@dataclass(frozen=True, kw_only=True) -class ArrayV2Metadata(ArrayMetadata): - shape: ChunkCoords - chunk_grid: RegularChunkGrid - data_type: np.dtype[Any] - fill_value: None | int | float = 0 - order: Literal["C", "F"] = "C" - filters: list[dict[str, JSON]] | None = None - dimension_separator: Literal[".", "/"] = "." - compressor: dict[str, JSON] | None = None - attributes: dict[str, JSON] = field(default_factory=dict) - zarr_format: Literal[2] = field(init=False, default=2) - - def __init__( - self, - *, - shape: ChunkCoords, - dtype: npt.DTypeLike, - chunks: ChunkCoords, - fill_value: Any, - order: Literal["C", "F"], - dimension_separator: Literal[".", "/"] = ".", - compressor: dict[str, JSON] | None = None, - filters: list[dict[str, JSON]] | None = None, - attributes: dict[str, JSON] | None = None, - ): - """ - Metadata for a Zarr version 2 array. - """ - shape_parsed = parse_shapelike(shape) - data_type_parsed = parse_dtype(dtype) - chunks_parsed = parse_shapelike(chunks) - compressor_parsed = parse_compressor(compressor) - order_parsed = parse_indexing_order(order) - dimension_separator_parsed = parse_separator(dimension_separator) - filters_parsed = parse_filters(filters) - fill_value_parsed = parse_fill_value(fill_value) - attributes_parsed = parse_attributes(attributes) - - object.__setattr__(self, "shape", shape_parsed) - object.__setattr__(self, "data_type", data_type_parsed) - object.__setattr__(self, "chunk_grid", RegularChunkGrid(chunk_shape=chunks_parsed)) - object.__setattr__(self, "compressor", compressor_parsed) - object.__setattr__(self, "order", order_parsed) - object.__setattr__(self, "dimension_separator", dimension_separator_parsed) - object.__setattr__(self, "filters", filters_parsed) - object.__setattr__(self, "fill_value", fill_value_parsed) - object.__setattr__(self, "attributes", attributes_parsed) - - # ensure that the metadata document is consistent - _ = parse_v2_metadata(self) - - @property - def ndim(self) -> int: - return len(self.shape) - - @property - def dtype(self) -> np.dtype[Any]: - return self.data_type +class DataType(Enum): + bool = "bool" + int8 = "int8" + int16 = "int16" + int32 = "int32" + int64 = "int64" + uint8 = "uint8" + uint16 = "uint16" + uint32 = "uint32" + uint64 = "uint64" + float32 = "float32" + float64 = "float64" @property - def chunks(self) -> ChunkCoords: - return self.chunk_grid.chunk_shape + def byte_count(self) -> int: + data_type_byte_counts = { + DataType.bool: 1, + DataType.int8: 1, + DataType.int16: 2, + DataType.int32: 4, + DataType.int64: 8, + DataType.uint8: 1, + DataType.uint16: 2, + DataType.uint32: 4, + DataType.uint64: 8, + DataType.float32: 4, + DataType.float64: 8, + } + return data_type_byte_counts[self] @property - def codec_pipeline(self) -> CodecPipeline: - from zarr.codecs import BatchedCodecPipeline - - return BatchedCodecPipeline.from_list( - [V2Filters(self.filters or []), V2Compressor(self.compressor)] - ) - - def to_buffer_dict(self) -> dict[str, Buffer]: - def _json_convert( - o: np.dtype[Any], - ) -> str | list[tuple[str, str] | tuple[str, str, tuple[int, ...]]]: - if isinstance(o, np.dtype): - if o.fields is None: - return o.str - else: - return o.descr - raise TypeError + def has_endianness(self) -> _bool: + # This might change in the future, e.g. for a complex with 2 8-bit floats + return self.byte_count != 1 - zarray_dict = self.to_dict() - assert isinstance(zarray_dict, dict) - zattrs_dict = zarray_dict.pop("attributes", {}) - assert isinstance(zattrs_dict, dict) - return { - ZARRAY_JSON: Buffer.from_bytes(json.dumps(zarray_dict, default=_json_convert).encode()), - ZATTRS_JSON: Buffer.from_bytes(json.dumps(zattrs_dict).encode()), + def to_numpy_shortname(self) -> str: + data_type_to_numpy = { + DataType.bool: "bool", + DataType.int8: "i1", + DataType.int16: "i2", + DataType.int32: "i4", + DataType.int64: "i8", + DataType.uint8: "u1", + DataType.uint16: "u2", + DataType.uint32: "u4", + DataType.uint64: "u8", + DataType.float32: "f4", + DataType.float64: "f8", } + return data_type_to_numpy[self] @classmethod - def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata: - # check that the zarr_format attribute is correct - _ = parse_zarr_format_v2(data.pop("zarr_format")) - return cls(**data) - - def to_dict(self) -> JSON: - zarray_dict = super().to_dict() - - assert isinstance(zarray_dict, dict) - - _ = zarray_dict.pop("chunk_grid") - zarray_dict["chunks"] = self.chunk_grid.chunk_shape - - _ = zarray_dict.pop("data_type") - zarray_dict["dtype"] = self.data_type.str - - return zarray_dict - - def get_chunk_spec( - self, _chunk_coords: ChunkCoords, order: Literal["C", "F"], prototype: BufferPrototype - ) -> ArraySpec: - return ArraySpec( - shape=self.chunk_grid.chunk_shape, - dtype=self.dtype, - fill_value=self.fill_value, - order=order, - prototype=prototype, - ) - - def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str: - chunk_identifier = self.dimension_separator.join(map(str, chunk_coords)) - return "0" if chunk_identifier == "" else chunk_identifier - - def update_shape(self, shape: ChunkCoords) -> Self: - return replace(self, shape=shape) - - def update_attributes(self, attributes: dict[str, JSON]) -> Self: - return replace(self, attributes=attributes) - - -def parse_dimension_names(data: None | Iterable[str]) -> tuple[str, ...] | None: - if data is None: - return data - elif all([isinstance(x, str) for x in data]): - return tuple(data) - else: - msg = f"Expected either None or a iterable of str, got {type(data)}" - raise TypeError(msg) - - -# todo: real validation -def parse_attributes(data: None | dict[str, JSON]) -> dict[str, JSON]: - if data is None: - return {} - - return data - - -# todo: move to its own module and drop _v3 suffix -# todo: consider folding all the literal parsing into a single function -# that takes 2 arguments -def parse_zarr_format_v3(data: Literal[3]) -> Literal[3]: - if data == 3: - return data - raise ValueError(f"Invalid value. Expected 3. Got {data}.") - - -# todo: move to its own module and drop _v2 suffix -def parse_zarr_format_v2(data: Literal[2]) -> Literal[2]: - if data == 2: - return data - raise ValueError(f"Invalid value. Expected 2. Got {data}.") - - -def parse_node_type_array(data: Literal["array"]) -> Literal["array"]: - if data == "array": - return data - raise ValueError(f"Invalid value. Expected 'array'. Got {data}.") - - -# todo: real validation -def parse_filters(data: list[dict[str, JSON]] | None) -> list[dict[str, JSON]] | None: - return data - - -# todo: real validation -def parse_compressor(data: dict[str, JSON] | None) -> dict[str, JSON] | None: - return data - - -def parse_v2_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata: - if (l_chunks := len(data.chunks)) != (l_shape := len(data.shape)): - msg = ( - f"The `shape` and `chunks` attributes must have the same length. " - f"`chunks` has length {l_chunks}, but `shape` has length {l_shape}." - ) - raise ValueError(msg) - return data - - -def parse_codecs(data: Iterable[Codec | JSON]) -> CodecPipeline: - from zarr.codecs import BatchedCodecPipeline - - if not isinstance(data, Iterable): - raise TypeError(f"Expected iterable, got {type(data)}") - return BatchedCodecPipeline.from_dict(data) + def from_dtype(cls, dtype: np.dtype[Any]) -> DataType: + dtype_to_data_type = { + "|b1": "bool", + "bool": "bool", + "|i1": "int8", + " None: ... def test_parse_zarr_format_v3_valid() -> None: - assert parse_zarr_format_v3(3) == 3 + assert parse_zarr_format(3) == 3 @pytest.mark.parametrize("data", [None, 1, 2, 4, 5, "3"]) def test_parse_zarr_foramt_v3_invalid(data: Any) -> None: with pytest.raises(ValueError, match=f"Invalid value. Expected 3. Got {data}"): - parse_zarr_format_v3(data) + parse_zarr_format(data) def test_parse_zarr_format_v2_valid() -> None: - assert parse_zarr_format_v2(2) == 2 + assert parse_zarr_format(2) == 2 @pytest.mark.parametrize("data", [None, 1, 3, 4, 5, "3"]) def test_parse_zarr_foramt_v2_invalid(data: Any) -> None: with pytest.raises(ValueError, match=f"Invalid value. Expected 2. Got {data}"): - parse_zarr_format_v2(data) + parse_zarr_format(data)