Skip to content

Commit

Permalink
Refactor builders and check spec dtype (#452)
Browse files Browse the repository at this point in the history
Co-authored-by: Andrew Tritt <ajtritt@lbl.gov>
  • Loading branch information
rly and ajtritt committed Nov 16, 2020
1 parent c94ab5e commit 66b9496
Show file tree
Hide file tree
Showing 9 changed files with 673 additions and 681 deletions.
8 changes: 6 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## HDMF 2.3.0 (Upcoming)

### New features
- Drop support for Python 3.5. Add testing for Python 3.9. @ajtritt (#459)
- Add methods for automatic creation of `MultiContainerInterface` classes. @bendichter (#420, #425)
- Add ability to specify a custom class for new columns to a `DynamicTable` that are not `VectorData`,
`DynamicTableRegion`, or `VocabData` using `DynamicTable.__columns__` or `DynamicTable.add_column(...)`. @rly (#436)
Expand All @@ -15,12 +16,15 @@
- Add SimpleMultiContainer, a data_type for storing a Container and Data objects together. @ajtritt (#449)
- Support `pathlib.Path` paths in `HDMFIO.__init__`, `HDF5IO.__init__`, and `HDF5IO.load_namespaces`. @dsleiter (#439)
- Use hdmf-common-schema 1.2.1. See https://hdmf-common-schema.readthedocs.io/en/latest/format_release_notes.html for details.
- Block usage of h5py 3+. h5py>=2.9, <3 is supported.
- Block usage of numpy>=1.19.4 due to a known issue with numpy on some Windows 10 systems. numpy>1.16, <1.19.4 is supported.
- Block usage of h5py 3+. h5py>=2.9, <3 is supported. @rly (#461)
- Block usage of numpy>=1.19.4 due to a known issue with numpy on some Windows 10 systems. numpy>1.16, <1.19.4 is supported. @rly (#461)

### Internal improvements
- Refactor `HDF5IO.write_dataset` to be more readable. @rly (#428)
- Fix bug in slicing tables with DynamicTableRegions. @ajtritt (#449)
- Remove unused or refactored internal builder functions `GroupBuilder.add_group`, `GroupBuilder.add_dataset`,
`GroupBuilder.add_link`, `GroupBuilder.set_builder`, `BaseBuilder.deep_update`, `GroupBuilder.deep_update`,
`DatasetBuilder.deep_update`. Make `BaseBuilder` not instantiable and refactor builder code. @rly (#452)

### Bug fixes
- Fix development package dependency issues. @rly (#431)
Expand Down
403 changes: 142 additions & 261 deletions src/hdmf/build/builders.py

Large diffs are not rendered by default.

97 changes: 43 additions & 54 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,14 +550,12 @@ def get_attr_value(self, **kwargs):
msg = "Container '%s' (%s) does not have attribute '%s'" % (container.name, type(container), attr_name)
raise Exception(msg)
if attr_val is not None:
attr_val = self.__convert_value(attr_val, spec)
attr_val = self.__convert_string(attr_val, spec)
# else: attr_val is an attribute on the Container and its value is None
return attr_val

def __convert_value(self, value, spec):
"""
Convert string types to the specified dtype
"""
def __convert_string(self, value, spec):
"""Convert string types to the specified dtype."""
ret = value
if isinstance(spec, AttributeSpec):
if 'text' in spec.dtype:
Expand All @@ -567,27 +565,24 @@ def __convert_value(self, value, spec):
ret = str(value)
elif isinstance(spec, DatasetSpec):
# TODO: make sure we can handle specs with data_type_inc set
if spec.data_type_inc is not None:
ret = value
else:
if spec.dtype is not None:
string_type = None
if 'text' in spec.dtype:
string_type = str
elif 'ascii' in spec.dtype:
string_type = bytes
elif 'isodatetime' in spec.dtype:
string_type = datetime.isoformat
if string_type is not None:
if spec.shape is not None or spec.dims is not None:
ret = list(map(string_type, value))
else:
ret = string_type(value)
# copy over any I/O parameters if they were specified
if isinstance(value, DataIO):
params = value.get_io_params()
params['data'] = ret
ret = value.__class__(**params)
if spec.data_type_inc is None and spec.dtype is not None:
string_type = None
if 'text' in spec.dtype:
string_type = str
elif 'ascii' in spec.dtype:
string_type = bytes
elif 'isodatetime' in spec.dtype:
string_type = datetime.isoformat
if string_type is not None:
if spec.shape is not None or spec.dims is not None:
ret = list(map(string_type, value))
else:
ret = string_type(value)
# copy over any I/O parameters if they were specified
if isinstance(value, DataIO):
params = value.get_io_params()
params['data'] = ret
ret = value.__class__(**params)
return ret

@docval({"name": "spec", "type": Spec, "doc": "the spec to get the constructor argument for"},
Expand Down Expand Up @@ -929,7 +924,8 @@ def __add_datasets(self, builder, datasets, container, build_manager, source, ex
raise Exception(msg) from ex
self.logger.debug(" Adding untyped dataset for spec name %s and adding attributes"
% repr(spec.name))
sub_builder = builder.add_dataset(spec.name, data, dtype=dtype)
sub_builder = DatasetBuilder(spec.name, data, parent=builder, source=source, dtype=dtype)
builder.set_dataset(sub_builder)
self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source, export)
else:
self.logger.debug(" Adding typed dataset for spec name: %s, %s: %s, %s: %s"
Expand All @@ -955,40 +951,33 @@ def __add_groups(self, builder, groups, container, build_manager, source, export
self.__add_links(sub_builder, spec.links, container, build_manager, source, export)

# handle subgroups that are not Containers
attr_name = self.get_attribute(spec)
if attr_name is not None:
attr_value = self.get_attr_value(spec, container, build_manager)
if any(isinstance(attr_value, t) for t in (list, tuple, set, dict)):
it = iter(attr_value)
if isinstance(attr_value, dict):
it = iter(attr_value.values())
for item in it:
if isinstance(item, Container):
self.__add_containers(sub_builder, spec, item, build_manager, source, container, export)
attr_value = self.get_attr_value(spec, container, build_manager)
if isinstance(attr_value, (list, tuple, set, dict)):
if isinstance(attr_value, dict):
attr_value = attr_value.values()
for item in attr_value:
if isinstance(item, Container):
self.__add_containers(sub_builder, spec, item, build_manager, source, container, export)
self.__add_groups(sub_builder, spec.groups, container, build_manager, source, export)
empty = sub_builder.is_empty()
if not empty or (empty and isinstance(spec.quantity, int)):
if sub_builder.name not in builder.groups:
builder.set_group(sub_builder)
else:
if spec.data_type_def is not None:
self.logger.debug(" Adding group for spec name: %s, %s: %s, %s: %s"
% (repr(spec.name),
spec.def_key(), repr(spec.data_type_def),
spec.inc_key(), repr(spec.data_type_inc)))
attr_name = self.get_attribute(spec)
if attr_name is not None:
attr_value = getattr(container, attr_name, None)
if attr_value is not None:
self.__add_containers(builder, spec, attr_value, build_manager, source, container, export)
else: # data_type_def is None and data_type_inc is not None
self.logger.debug(" Adding group for spec name: %s, %s: %s, %s: %s"
% (repr(spec.name),
spec.def_key(), repr(spec.data_type_def),
spec.inc_key(), repr(spec.data_type_inc)))
attr_value = self.get_attr_value(spec, container, build_manager)
elif spec.data_type_def is not None:
self.logger.debug(" Adding group for spec name: %s, %s: %s"
% (repr(spec.name), spec.def_key(), repr(spec.data_type_def)))
attr_name = self.get_attribute(spec)
if attr_name is not None:
# NOTE: can this be overridden? if so, this should use self.get_attr_value
attr_value = getattr(container, attr_name, None)
if attr_value is not None:
self.__add_containers(builder, spec, attr_value, build_manager, source, container, export)
else: # data_type_def is None and data_type_inc is not None
self.logger.debug(" Adding group for spec name: %s, %s: %s"
% (repr(spec.name), spec.inc_key(), repr(spec.data_type_inc)))
attr_value = self.get_attr_value(spec, container, build_manager)
if attr_value is not None:
self.__add_containers(builder, spec, attr_value, build_manager, source, container, export)

def __add_containers(self, builder, spec, value, build_manager, source, parent_container, export):
if isinstance(value, AbstractContainer):
Expand Down
50 changes: 23 additions & 27 deletions src/hdmf/spec/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
}


class DtypeHelper():
class DtypeHelper:
# Dict where the keys are the primary data type and the values are list of strings with synonyms for the dtype
# make sure keys are consistent between hdmf.spec.spec.DtypeHelper.primary_dtype_synonyms,
# hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.manager.TypeMap._spec_dtype_map,
Expand Down Expand Up @@ -70,6 +70,14 @@ def simplify_cpd_type(cpd_type):
ret.append(exp_key)
return ret

@staticmethod
def check_dtype(dtype):
"""Check that the dtype string is a reference or a valid primary dtype."""
if not isinstance(dtype, RefSpec) and dtype not in DtypeHelper.valid_primary_dtypes:
raise ValueError("dtype '%s' is not a valid primary data type. Allowed dtypes: %s"
% (dtype, str(DtypeHelper.valid_primary_dtypes)))
return dtype


class ConstructableDict(dict, metaclass=ABCMeta):
@classmethod
Expand Down Expand Up @@ -226,14 +234,7 @@ def __init__(self, **kwargs):
name, dtype, doc, dims, shape, required, parent, value, default_value = getargs(
'name', 'dtype', 'doc', 'dims', 'shape', 'required', 'parent', 'value', 'default_value', kwargs)
super().__init__(doc, name=name, required=required, parent=parent)
if isinstance(dtype, RefSpec):
self['dtype'] = dtype
else:
self['dtype'] = dtype
# Validate the dype string
if self['dtype'] not in DtypeHelper.valid_primary_dtypes:
raise ValueError('dtype %s not a valid primary data type %s' % (self['dtype'],
str(DtypeHelper.valid_primary_dtypes)))
self['dtype'] = DtypeHelper.check_dtype(dtype)
if value is not None:
self.pop('required', None)
self['value'] = value
Expand Down Expand Up @@ -554,7 +555,7 @@ def __init__(self, **kwargs):
doc, name, dtype = getargs('doc', 'name', 'dtype', kwargs)
self['doc'] = doc
self['name'] = name
self.assertValidDtype(dtype)
self.check_valid_dtype(dtype)
self['dtype'] = dtype

@property
Expand All @@ -574,17 +575,17 @@ def dtype(self):

@staticmethod
def assertValidDtype(dtype):
# Calls check_valid_dtype. This method is maintained for backwards compatibility
return DtypeSpec.check_valid_dtype(dtype)

@staticmethod
def check_valid_dtype(dtype):
if isinstance(dtype, dict):
if _target_type_key not in dtype:
msg = "'dtype' must have the key '%s'" % _target_type_key
raise AssertionError(msg)
elif isinstance(dtype, RefSpec):
pass
raise ValueError(msg)
else:
if dtype not in DtypeHelper.valid_primary_dtypes:
msg = "'dtype=%s' string not in valid primary data type: %s " % (str(dtype),
str(DtypeHelper.valid_primary_dtypes))
raise AssertionError(msg)
DtypeHelper.check_dtype(dtype)
return True

@staticmethod
Expand Down Expand Up @@ -651,17 +652,12 @@ def __init__(self, **kwargs):
if isinstance(dtype, list): # Dtype is a compound data type
for _i, col in enumerate(dtype):
if not isinstance(col, DtypeSpec):
msg = 'must use DtypeSpec if defining compound dtype - found %s at element %d' % \
(type(col), _i)
msg = ('must use DtypeSpec if defining compound dtype - found %s at element %d'
% (type(col), _i))
raise ValueError(msg)
self['dtype'] = dtype
elif isinstance(dtype, RefSpec): # Dtype is a reference
self['dtype'] = dtype
else: # Dtype is a string
self['dtype'] = dtype
if self['dtype'] not in DtypeHelper.valid_primary_dtypes:
raise ValueError('dtype %s not a valid primary data type %s' %
(self['dtype'], str(DtypeHelper.valid_primary_dtypes)))
else:
DtypeHelper.check_dtype(dtype)
self['dtype'] = dtype
super().__init__(doc, **kwargs)
if default_value is not None:
self['default_value'] = default_value
Expand Down
2 changes: 1 addition & 1 deletion test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import unittest
from tests.coloredtestrunner import ColoredTestRunner, ColoredTestResult

flags = {'hdmf': 1, 'integration': 3, 'example': 4}
flags = {'hdmf': 1, 'example': 4}

TOTAL = 0
FAILURES = 0
Expand Down

0 comments on commit 66b9496

Please sign in to comment.