Skip to content

Commit

Permalink
ADD: VLType and CompoundType, align with nc-complex, add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kmuehlbauer committed Jan 25, 2024
1 parent b19d4a0 commit 6dc11d3
Show file tree
Hide file tree
Showing 3 changed files with 262 additions and 18 deletions.
113 changes: 101 additions & 12 deletions h5netcdf/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ def dtype(self):


class UserType(BaseObject):
_cls_name = "h5netcdf.UserType"

@property
def name(self):
"""Return enum name."""
Expand All @@ -162,6 +164,18 @@ def __repr__(self):
return super().__repr__() + f", fields / values = {self.enum_dict!r}"


class VLType(UserType):
_cls_name = "h5netcdf.VLType"


class CompoundType(UserType):
_cls_name = "h5netcdf.CompoundType"

@property
def dtype_view(self):
return self.dtype_view


class BaseVariable(BaseObject):
def __init__(self, parent, name, dimensions=None):
super().__init__(parent, name)
Expand Down Expand Up @@ -567,7 +581,7 @@ def _check_dtype(self, dtype):

# is user type is given extract underlying h5py object
# we just use the h5py user type here
if isinstance(dtype, (EnumType,)):
if isinstance(dtype, (EnumType, VLType, CompoundType)):
h5type = dtype._h5ds
if dtype._root._h5file.filename != self._root._h5file.filename:
raise TypeError(
Expand All @@ -578,14 +592,31 @@ def _check_dtype(self, dtype):
# check if committed type can be accessed in current group hierarchy
dname = dtype.name.split("/")[-1]
if (
(user_type := self._all_enumtypes.get(dname)) is None
) or user_type._h5ds.name != h5type.name:
(user_type := self._all_usertypes.get(dname)) is None
) or self._root._h5file[user_type].name != h5type.name:
msg = (
f"Given dtype {dtype.name!r} is not accessible in current group"
f" {self._h5group.name!r} or any parent group. Instead it's defined at"
f" {h5type.name!r}. Please create it in the current or any parent group."
)
raise TypeError(msg)
elif np.dtype(dtype).kind == "c":
itemsize = np.dtype(dtype).itemsize
try:
width = {8: "FLOAT", 16: "DOUBLE"}[itemsize]
except KeyError as e:
raise TypeError(
"Currently only 'complex64' and 'complex128' dtypes are allowed."
) from e
dname = f"_PFNC_{width}_COMPLEX_TYPE"
# todo check compound type for existing complex types
# which may be used her
# if dname is not available in current group-path
# create and commit type in current group
if dname not in self._all_cmptypes:
dtype = self.create_cmptype(dtype, dname).dtype
# get committed type from file
# return self._all_cmptypes[dname]._h5ds

return dtype

Expand Down Expand Up @@ -674,6 +705,8 @@ class Group(Mapping):
_variable_cls = Variable
_dimension_cls = Dimension
_enumtype_cls = EnumType
_vltype_cls = VLType
_cmptype_cls = CompoundType

@property
def _group_cls(self):
Expand All @@ -691,15 +724,25 @@ def __init__(self, parent, name):

self._dimensions = Dimensions(self)
self._enumtypes = _LazyObjectLookup(self, self._enumtype_cls)
self._vltypes = _LazyObjectLookup(self, self._vltype_cls)
self._cmptypes = _LazyObjectLookup(self, self._cmptype_cls)
self._usertypes = dict()

# this map keeps track of all dimensions
if parent is self:
self._all_dimensions = ChainMap(self._dimensions)
self._all_enumtypes = ChainMap(self._enumtypes)
self._all_vltypes = ChainMap(self._vltypes)
self._all_cmptypes = ChainMap(self._cmptypes)
self._all_usertypes = ChainMap(self._usertypes)

else:
self._all_dimensions = parent._all_dimensions.new_child(self._dimensions)
self._all_h5groups = parent._all_h5groups.new_child(self._h5group)
self._all_enumtypes = parent._all_enumtypes.new_child(self._enumtypes)
self._all_vltypes = parent._all_vltypes.new_child(self._vltypes)
self._all_cmptypes = parent._all_cmptypes.new_child(self._cmptypes)
self._all_usertypes = parent._all_usertypes.new_child(self._usertypes)

self._variables = _LazyObjectLookup(self, self._variable_cls)
self._groups = _LazyObjectLookup(self, self._group_cls)
Expand All @@ -713,11 +756,14 @@ def __init__(self, parent, name):
# add to the groups collection if this is a h5py(d) Group
# instance
self._groups.add(k)
# todo: add other user types here
elif isinstance(
v, self._root._h5py.Datatype
) and self._root._h5py.check_enum_dtype(v.dtype):
self._enumtypes.add(k)
elif isinstance(v, self._root._h5py.Datatype):
if self._root._h5py.check_enum_dtype(v.dtype):
self._enumtypes.add(k)
elif self._root._h5py.check_vlen_dtype(v.dtype):
self._vltypes.add(k)
elif v.dtype.names is not None or "complex" in v.dtype.name:
self._cmptypes.add(k)
self._usertypes[k] = v.name
else:
if v.attrs.get("CLASS") == b"DIMENSION_SCALE":
# add dimension and retrieve size
Expand Down Expand Up @@ -1112,6 +1158,18 @@ def variables(self):
def enumtypes(self):
return Frozen(self._enumtypes)

@property
def vltypes(self):
return Frozen(self._vltypes)

@property
def cmptypes(self):
return Frozen(self._cmptypes)

@property
def usertypes(self):
return Frozen(self._usertypes)

@property
def dims(self):
return Frozen(self._dimensions)
Expand Down Expand Up @@ -1175,9 +1233,44 @@ def create_enumtype(self, datatype, datatype_name, enum_dict):
self._h5group[datatype_name] = et
# create enumtype class instance
enumtype = self._enumtype_cls(self, datatype_name)
# enumtype = self._usertype_cls(self, datatype_name)
self._enumtypes[datatype_name] = enumtype
self._usertypes[datatype_name] = enumtype._h5ds.name
return enumtype

def create_vltype(self, datatype, datatype_name):
"""Create VLType.
datatype: np.dtype
A numpy dtype object describing the base type.
datatype_name: string
A Python string containing a description of the VL data type.
"""
# wrap in numpy dtype first
datatype = np.dtype(datatype)
et = self._root._h5py.vlen_dtype(datatype)
self._h5group[datatype_name] = et
# create vltype class instance
vltype = self._vltype_cls(self, datatype_name)
self._vltypes[datatype_name] = vltype
self._usertypes[datatype_name] = vltype._h5ds.name
return vltype

def create_cmptype(self, datatype, datatype_name):
"""Create CompoundType.
datatype: np.dtype
A numpy dtype object describing the structured type.
datatype_name: string
A Python string containing a description of the compound data type.
"""
# wrap in numpy dtype first
datatype = np.dtype(datatype)
self._h5group[datatype_name] = datatype
# create compound class instance
cmptype = self._cmptype_cls(self, datatype_name)
self._cmptypes[datatype_name] = cmptype
self._usertypes[datatype_name] = cmptype._h5ds.name
return cmptype


class File(Group):
def __init__(self, path, mode="r", invalid_netcdf=False, phony_dims=None, **kwargs):
Expand Down Expand Up @@ -1342,12 +1435,8 @@ def _check_valid_netcdf_dtype(self, dtype):

if dtype == bool:
description = "boolean"
elif dtype == complex:
description = "complex"
elif self._h5py.check_dtype(ref=dtype) is not None:
description = "reference"
elif self._h5py.check_dtype(vlen=dtype) not in {None, str, bytes}:
description = "non-string variable length"
else:
description = None

Expand Down
17 changes: 17 additions & 0 deletions h5netcdf/legacyapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,34 @@ class EnumType(core.EnumType):
_cls_name = "h5netcdf.legacyapi.EnumType"


class VLType(core.VLType):
_cls_name = "h5netcdf.legacyapi.VLType"


class CompoundType(core.CompoundType):
_cls_name = "h5netcdf.legacyapi.CompoundType"


class UserType(core.UserType):
_cls_name = "h5netcdf.legacyapi.UserType"


class Group(core.Group, HasAttributesMixin):
_cls_name = "h5netcdf.legacyapi.Group"
_variable_cls = Variable
_enumtype_cls = EnumType
_vltype_cls = VLType
_cmptype_cls = CompoundType
_usertype_cls = UserType

@property
def _group_cls(self):
return Group

createGroup = core.Group.create_group
createEnumType = core.Group.create_enumtype
createVLType = core.Group.create_vltype
createCompoundType = core.Group.create_cmptype

def createDimension(self, name, size):
"""Creates a new dimension with given name and size.
Expand Down
Loading

0 comments on commit 6dc11d3

Please sign in to comment.