Skip to content

Commit

Permalink
Support spec dtypes "bytes" and "short", improve test coverage (#456)
Browse files Browse the repository at this point in the history
  • Loading branch information
rly committed Nov 11, 2020
1 parent b2509e6 commit 7297460
Show file tree
Hide file tree
Showing 6 changed files with 211 additions and 119 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
- Fix convert dtype when writing numpy array from `h5py.Dataset`. @rly (#427)
- Fix inheritance when non-`AbstractContainer` is base class. @rly (#444)
- Fix use of `hdmf.testing.assertContainerEqual(...)` for `Data` objects. @rly (#445)
- Add missing support for data conversion against spec dtypes "bytes" and "short". @rly (#456)

## HDMF 2.2.0 (August 14, 2020)

Expand Down
15 changes: 9 additions & 6 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,26 +787,29 @@ def get_type(cls, data):
"float64": np.float64,
"long": np.int64,
"int64": np.int64,
"uint64": np.uint64,
"int": np.int32,
"int32": np.int32,
"short": np.int16,
"int16": np.int16,
"int8": np.int8,
"uint64": np.uint64,
"uint": np.uint32,
"uint32": np.uint32,
"uint16": np.uint16,
"uint8": np.uint8,
"bool": np.bool_,
"text": H5_TEXT,
"utf": H5_TEXT,
"utf8": H5_TEXT,
"utf-8": H5_TEXT,
"ascii": H5_BINARY,
"str": H5_BINARY,
"isodatetime": H5_TEXT,
"uint32": np.uint32,
"uint16": np.uint16,
"uint8": np.uint8,
"bytes": H5_BINARY,
"ref": H5_REF,
"reference": H5_REF,
"object": H5_REF,
"region": H5_REGREF,
"isodatetime": H5_TEXT,
"datetime": H5_TEXT,
}

@classmethod
Expand Down
1 change: 0 additions & 1 deletion src/hdmf/build/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,6 @@ def load_namespaces(self, **kwargs):
# passing an np.int16 would raise a docval error.
# passing an int64 to __init__ would result in the field storing the value as an int64 (and subsequently written
# as an int64). no upconversion or downconversion happens as a result of this map
# see https://schema-language.readthedocs.io/en/latest/specification_language_description.html#dtype
_spec_dtype_map = {
'float32': (float, np.float32, np.float64),
'float': (float, np.float32, np.float64),
Expand Down
31 changes: 18 additions & 13 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,31 +89,36 @@ class ObjectMapper(metaclass=ExtenderMeta):
'''

# mapping from spec dtypes to numpy dtypes or functions for conversion of values to spec dtypes
# make sure keys are consistent between hdmf.spec.spec.DtypeHelper.primary_dtype_synonyms,
# hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.manager.TypeMap._spec_dtype_map,
# hdmf.validate.validator.__allowable, and backend dtype maps
__dtypes = {
"float": np.float32,
"float32": np.float32,
"double": np.float64,
"float64": np.float64,
"long": np.int64,
"int64": np.int64,
"uint64": np.uint64,
"int": np.int32,
"int32": np.int32,
"short": np.int16,
"int16": np.int16,
"int8": np.int8,
"uint": np.uint32,
"uint64": np.uint64,
"uint32": np.uint32,
"uint16": np.uint16,
"uint8": np.uint8,
"bool": np.bool_,
"text": _unicode,
"text": _unicode,
"utf": _unicode,
"utf8": _unicode,
"utf-8": _unicode,
"ascii": _ascii,
"str": _ascii,
"bytes": _ascii,
"isodatetime": _ascii,
"uint32": np.uint32,
"uint16": np.uint16,
"uint8": np.uint8,
"uint": np.uint32
"datetime": _ascii,
}

__no_convert = set()
Expand Down Expand Up @@ -211,10 +216,10 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901
ret_dtype = ret.dtype.type
elif isinstance(value, (tuple, list)):
if len(value) == 0:
if spec_dtype_type == _ascii:
ret_dtype = 'ascii'
elif spec_dtype_type == _unicode:
if spec_dtype_type is _unicode:
ret_dtype = 'utf8'
elif spec_dtype_type is _ascii:
ret_dtype = 'ascii'
else:
ret_dtype = spec_dtype_type
return value, ret_dtype
Expand All @@ -235,7 +240,7 @@ def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901
else:
if spec_dtype_type in (_unicode, _ascii):
ret_dtype = 'ascii'
if spec_dtype_type == _unicode:
if spec_dtype_type is _unicode:
ret_dtype = 'utf8'
ret = spec_dtype_type(value)
else:
Expand Down Expand Up @@ -286,7 +291,7 @@ def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901
return value, ret_dtype
if isinstance(value, (list, tuple)):
if len(value) == 0:
msg = "cannot infer dtype of empty list or tuple. Please use numpy array with specified dtype."
msg = "Cannot infer dtype of empty list or tuple. Please use numpy array with specified dtype."
raise ValueError(msg)
return value, cls.__check_edgecases(spec, value[0], spec_dtype)[1] # infer dtype from first element
ret_dtype = type(value)
Expand All @@ -302,7 +307,7 @@ def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901
msg = "got RefSpec for value of type %s" % type(value)
raise ValueError(msg)
return value, spec_dtype
if spec_dtype is not None and spec_dtype not in cls.__dtypes:
if spec_dtype is not None and spec_dtype not in cls.__dtypes: # pragma: no cover
msg = "unrecognized dtype: %s -- cannot convert value" % spec_dtype
raise ValueError(msg)
return None, None
Expand Down
42 changes: 22 additions & 20 deletions src/hdmf/spec/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,29 @@

class DtypeHelper():
# Dict where the keys are the primary data type and the values are list of strings with synonyms for the dtype
# this is also used in the validator
# if this list is updated, also update hdmf.build.manager.TypeMap._spec_dtype_map
# make sure keys are consistent between hdmf.spec.spec.DtypeHelper.primary_dtype_synonyms,
# hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.manager.TypeMap._spec_dtype_map,
# hdmf.validate.validator.__allowable, and backend dtype maps
# see https://hdmf-schema-language.readthedocs.io/en/latest/specification_language_description.html#dtype
primary_dtype_synonyms = {
'float': ["float", "float32"],
'double': ["double", "float64"],
'short': ["int16", "short"],
'int': ["int32", "int"],
'long': ["int64", "long"],
'utf': ["text", "utf", "utf8", "utf-8"],
'ascii': ["ascii", "bytes"],
'bool': ["bool"],
'int8': ["int8"],
'uint8': ["uint8"],
'uint16': ["uint16"],
'uint32': ["uint32", "uint"],
'uint64': ["uint64"],
'object': ['object'],
'region': ['region'],
'numeric': ['numeric'],
'isodatetime': ["isodatetime", "datetime"]
}
'float': ["float", "float32"],
'double': ["double", "float64"],
'short': ["int16", "short"],
'int': ["int32", "int"],
'long': ["int64", "long"],
'utf': ["text", "utf", "utf8", "utf-8"],
'ascii': ["ascii", "bytes"],
'bool': ["bool"],
'int8': ["int8"],
'uint8': ["uint8"],
'uint16': ["uint16"],
'uint32': ["uint32", "uint"],
'uint64': ["uint64"],
'object': ['object'],
'region': ['region'],
'numeric': ['numeric'],
'isodatetime': ["isodatetime", "datetime"]
}

# List of recommended primary dtype strings. These are the keys of primary_dtype_string_synonyms
recommended_primary_dtypes = list(primary_dtype_synonyms.keys())
Expand Down

0 comments on commit 7297460

Please sign in to comment.