Refactor builders and check spec dtype (#452)

Co-authored-by: Andrew Tritt <ajtritt@lbl.gov>
hdmf-dev · Nov 16, 2020 · 66b9496 · 66b9496
1 parent c94ab5e
commit 66b9496
Show file tree

Hide file tree

Showing 9 changed files with 673 additions and 681 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,7 @@
 ## HDMF 2.3.0 (Upcoming)
 
 ### New features
+- Drop support for Python 3.5. Add testing for Python 3.9. @ajtritt (#459)
 - Add methods for automatic creation of `MultiContainerInterface` classes. @bendichter (#420, #425)
 - Add ability to specify a custom class for new columns to a `DynamicTable` that are not `VectorData`,
   `DynamicTableRegion`, or `VocabData` using `DynamicTable.__columns__` or `DynamicTable.add_column(...)`. @rly (#436)  
@@ -15,12 +16,15 @@
 - Add SimpleMultiContainer, a data_type for storing a Container and Data objects together. @ajtritt (#449)
 - Support `pathlib.Path` paths in `HDMFIO.__init__`, `HDF5IO.__init__`, and `HDF5IO.load_namespaces`. @dsleiter (#439)
 - Use hdmf-common-schema 1.2.1. See https://hdmf-common-schema.readthedocs.io/en/latest/format_release_notes.html for details.
-- Block usage of h5py 3+. h5py>=2.9, <3 is supported.
-- Block usage of numpy>=1.19.4 due to a known issue with numpy on some Windows 10 systems. numpy>1.16, <1.19.4 is supported.
+- Block usage of h5py 3+. h5py>=2.9, <3 is supported. @rly (#461)
+- Block usage of numpy>=1.19.4 due to a known issue with numpy on some Windows 10 systems. numpy>1.16, <1.19.4 is supported.  @rly (#461)
 
 ### Internal improvements
 - Refactor `HDF5IO.write_dataset` to be more readable. @rly (#428)
 - Fix bug in slicing tables with DynamicTableRegions. @ajtritt (#449)
+- Remove unused or refactored internal builder functions `GroupBuilder.add_group`, `GroupBuilder.add_dataset`,
+  `GroupBuilder.add_link`, `GroupBuilder.set_builder`, `BaseBuilder.deep_update`, `GroupBuilder.deep_update`,
+  `DatasetBuilder.deep_update`. Make `BaseBuilder` not instantiable and refactor builder code. @rly (#452)
 
 ### Bug fixes
 - Fix development package dependency issues. @rly (#431)

diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py
diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
@@ -550,14 +550,12 @@ def get_attr_value(self, **kwargs):
                 msg = "Container '%s' (%s) does not have attribute '%s'" % (container.name, type(container), attr_name)
                 raise Exception(msg)
             if attr_val is not None:
-                attr_val = self.__convert_value(attr_val, spec)
+                attr_val = self.__convert_string(attr_val, spec)
             # else: attr_val is an attribute on the Container and its value is None
         return attr_val
 
-    def __convert_value(self, value, spec):
-        """
-        Convert string types to the specified dtype
-        """
+    def __convert_string(self, value, spec):
+        """Convert string types to the specified dtype."""
         ret = value
         if isinstance(spec, AttributeSpec):
             if 'text' in spec.dtype:
@@ -567,27 +565,24 @@ def __convert_value(self, value, spec):
                     ret = str(value)
         elif isinstance(spec, DatasetSpec):
             # TODO: make sure we can handle specs with data_type_inc set
-            if spec.data_type_inc is not None:
-                ret = value
-            else:
-                if spec.dtype is not None:
-                    string_type = None
-                    if 'text' in spec.dtype:
-                        string_type = str
-                    elif 'ascii' in spec.dtype:
-                        string_type = bytes
-                    elif 'isodatetime' in spec.dtype:
-                        string_type = datetime.isoformat
-                    if string_type is not None:
-                        if spec.shape is not None or spec.dims is not None:
-                            ret = list(map(string_type, value))
-                        else:
-                            ret = string_type(value)
-                        # copy over any I/O parameters if they were specified
-                        if isinstance(value, DataIO):
-                            params = value.get_io_params()
-                            params['data'] = ret
-                            ret = value.__class__(**params)
+            if spec.data_type_inc is None and spec.dtype is not None:
+                string_type = None
+                if 'text' in spec.dtype:
+                    string_type = str
+                elif 'ascii' in spec.dtype:
+                    string_type = bytes
+                elif 'isodatetime' in spec.dtype:
+                    string_type = datetime.isoformat
+                if string_type is not None:
+                    if spec.shape is not None or spec.dims is not None:
+                        ret = list(map(string_type, value))
+                    else:
+                        ret = string_type(value)
+                    # copy over any I/O parameters if they were specified
+                    if isinstance(value, DataIO):
+                        params = value.get_io_params()
+                        params['data'] = ret
+                        ret = value.__class__(**params)
         return ret
 
     @docval({"name": "spec", "type": Spec, "doc": "the spec to get the constructor argument for"},
@@ -929,7 +924,8 @@ def __add_datasets(self, builder, datasets, container, build_manager, source, ex
                         raise Exception(msg) from ex
                     self.logger.debug("        Adding untyped dataset for spec name %s and adding attributes"
                                       % repr(spec.name))
-                    sub_builder = builder.add_dataset(spec.name, data, dtype=dtype)
+                    sub_builder = DatasetBuilder(spec.name, data, parent=builder, source=source, dtype=dtype)
+                    builder.set_dataset(sub_builder)
                 self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source, export)
             else:
                 self.logger.debug("        Adding typed dataset for spec name: %s, %s: %s, %s: %s"
@@ -955,40 +951,33 @@ def __add_groups(self, builder, groups, container, build_manager, source, export
                 self.__add_links(sub_builder, spec.links, container, build_manager, source, export)
 
                 # handle subgroups that are not Containers
-                attr_name = self.get_attribute(spec)
-                if attr_name is not None:
-                    attr_value = self.get_attr_value(spec, container, build_manager)
-                    if any(isinstance(attr_value, t) for t in (list, tuple, set, dict)):
-                        it = iter(attr_value)
-                        if isinstance(attr_value, dict):
-                            it = iter(attr_value.values())
-                        for item in it:
-                            if isinstance(item, Container):
-                                self.__add_containers(sub_builder, spec, item, build_manager, source, container, export)
+                attr_value = self.get_attr_value(spec, container, build_manager)
+                if isinstance(attr_value, (list, tuple, set, dict)):
+                    if isinstance(attr_value, dict):
+                        attr_value = attr_value.values()
+                    for item in attr_value:
+                        if isinstance(item, Container):
+                            self.__add_containers(sub_builder, spec, item, build_manager, source, container, export)
                 self.__add_groups(sub_builder, spec.groups, container, build_manager, source, export)
                 empty = sub_builder.is_empty()
                 if not empty or (empty and isinstance(spec.quantity, int)):
                     if sub_builder.name not in builder.groups:
                         builder.set_group(sub_builder)
-            else:
-                if spec.data_type_def is not None:
-                    self.logger.debug("    Adding group for spec name: %s, %s: %s, %s: %s"
-                                      % (repr(spec.name),
-                                         spec.def_key(), repr(spec.data_type_def),
-                                         spec.inc_key(), repr(spec.data_type_inc)))
-                    attr_name = self.get_attribute(spec)
-                    if attr_name is not None:
-                        attr_value = getattr(container, attr_name, None)
-                        if attr_value is not None:
-                            self.__add_containers(builder, spec, attr_value, build_manager, source, container, export)
-                else:  # data_type_def is None and data_type_inc is not None
-                    self.logger.debug("    Adding group for spec name: %s, %s: %s, %s: %s"
-                                      % (repr(spec.name),
-                                         spec.def_key(), repr(spec.data_type_def),
-                                         spec.inc_key(), repr(spec.data_type_inc)))
-                    attr_value = self.get_attr_value(spec, container, build_manager)
+            elif spec.data_type_def is not None:
+                self.logger.debug("    Adding group for spec name: %s, %s: %s"
+                                  % (repr(spec.name), spec.def_key(), repr(spec.data_type_def)))
+                attr_name = self.get_attribute(spec)
+                if attr_name is not None:
+                    # NOTE: can this be overridden? if so, this should use self.get_attr_value
+                    attr_value = getattr(container, attr_name, None)
                     if attr_value is not None:
                         self.__add_containers(builder, spec, attr_value, build_manager, source, container, export)
+            else:  # data_type_def is None and data_type_inc is not None
+                self.logger.debug("    Adding group for spec name: %s, %s: %s"
+                                  % (repr(spec.name), spec.inc_key(), repr(spec.data_type_inc)))
+                attr_value = self.get_attr_value(spec, container, build_manager)
+                if attr_value is not None:
+                    self.__add_containers(builder, spec, attr_value, build_manager, source, container, export)
 
     def __add_containers(self, builder, spec, value, build_manager, source, parent_container, export):
         if isinstance(value, AbstractContainer):

diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py
@@ -18,7 +18,7 @@
 }
 
 
-class DtypeHelper():
+class DtypeHelper:
     # Dict where the keys are the primary data type and the values are list of strings with synonyms for the dtype
     # make sure keys are consistent between hdmf.spec.spec.DtypeHelper.primary_dtype_synonyms,
     # hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.manager.TypeMap._spec_dtype_map,
@@ -70,6 +70,14 @@ def simplify_cpd_type(cpd_type):
             ret.append(exp_key)
         return ret
 
+    @staticmethod
+    def check_dtype(dtype):
+        """Check that the dtype string is a reference or a valid primary dtype."""
+        if not isinstance(dtype, RefSpec) and dtype not in DtypeHelper.valid_primary_dtypes:
+            raise ValueError("dtype '%s' is not a valid primary data type. Allowed dtypes: %s"
+                             % (dtype, str(DtypeHelper.valid_primary_dtypes)))
+        return dtype
+
 
 class ConstructableDict(dict, metaclass=ABCMeta):
     @classmethod
@@ -226,14 +234,7 @@ def __init__(self, **kwargs):
         name, dtype, doc, dims, shape, required, parent, value, default_value = getargs(
             'name', 'dtype', 'doc', 'dims', 'shape', 'required', 'parent', 'value', 'default_value', kwargs)
         super().__init__(doc, name=name, required=required, parent=parent)
-        if isinstance(dtype, RefSpec):
-            self['dtype'] = dtype
-        else:
-            self['dtype'] = dtype
-            # Validate the dype string
-            if self['dtype'] not in DtypeHelper.valid_primary_dtypes:
-                raise ValueError('dtype %s not a valid primary data type %s' % (self['dtype'],
-                                                                                str(DtypeHelper.valid_primary_dtypes)))
+        self['dtype'] = DtypeHelper.check_dtype(dtype)
         if value is not None:
             self.pop('required', None)
             self['value'] = value
@@ -554,7 +555,7 @@ def __init__(self, **kwargs):
         doc, name, dtype = getargs('doc', 'name', 'dtype', kwargs)
         self['doc'] = doc
         self['name'] = name
-        self.assertValidDtype(dtype)
+        self.check_valid_dtype(dtype)
         self['dtype'] = dtype
 
     @property
@@ -574,17 +575,17 @@ def dtype(self):
 
     @staticmethod
     def assertValidDtype(dtype):
+        # Calls check_valid_dtype. This method is maintained for backwards compatibility
+        return DtypeSpec.check_valid_dtype(dtype)
+
+    @staticmethod
+    def check_valid_dtype(dtype):
         if isinstance(dtype, dict):
             if _target_type_key not in dtype:
                 msg = "'dtype' must have the key '%s'" % _target_type_key
-                raise AssertionError(msg)
-        elif isinstance(dtype, RefSpec):
-            pass
+                raise ValueError(msg)
         else:
-            if dtype not in DtypeHelper.valid_primary_dtypes:
-                msg = "'dtype=%s' string not in valid primary data type: %s " % (str(dtype),
-                                                                                 str(DtypeHelper.valid_primary_dtypes))
-                raise AssertionError(msg)
+            DtypeHelper.check_dtype(dtype)
         return True
 
     @staticmethod
@@ -651,17 +652,12 @@ def __init__(self, **kwargs):
             if isinstance(dtype, list):  # Dtype is a compound data type
                 for _i, col in enumerate(dtype):
                     if not isinstance(col, DtypeSpec):
-                        msg = 'must use DtypeSpec if defining compound dtype - found %s at element %d' % \
-                                (type(col), _i)
+                        msg = ('must use DtypeSpec if defining compound dtype - found %s at element %d'
+                               % (type(col), _i))
                         raise ValueError(msg)
-                self['dtype'] = dtype
-            elif isinstance(dtype, RefSpec):  # Dtype is a reference
-                self['dtype'] = dtype
-            else:   # Dtype is a string
-                self['dtype'] = dtype
-                if self['dtype'] not in DtypeHelper.valid_primary_dtypes:
-                    raise ValueError('dtype %s not a valid primary data type %s' %
-                                     (self['dtype'], str(DtypeHelper.valid_primary_dtypes)))
+            else:
+                DtypeHelper.check_dtype(dtype)
+            self['dtype'] = dtype
         super().__init__(doc, **kwargs)
         if default_value is not None:
             self['default_value'] = default_value

diff --git a/test.py b/test.py
@@ -13,7 +13,7 @@
 import unittest
 from tests.coloredtestrunner import ColoredTestRunner, ColoredTestResult
 
-flags = {'hdmf': 1, 'integration': 3, 'example': 4}
+flags = {'hdmf': 1, 'example': 4}
 
 TOTAL = 0
 FAILURES = 0