Skip to content

Commit

Permalink
Do not auto-read 1-element h5py.Dataset with np.void dtype (#438)
Browse files Browse the repository at this point in the history
  • Loading branch information
rly committed Oct 13, 2020
1 parent 31a0fd1 commit 0302e16
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 5 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
### New features
- Add ability to specify a custom class for new columns to a `DynamicTable` that are not `VectorData`,
`DynamicTableRegion`, or `VocabData` using `DynamicTable.__columns__` or `DynamicTable.add_column(...)`. @rly (#436)
- Add capability to add a row to a column after IO. @bendichter (#426)

### Bug fixes
- Fix handling of empty lists against a spec with text/bytes dtype. @rly (#434)
- Fix handling of 1-element datasets with compound dtype against a scalar spec with text/bytes dtype. @rly (#438)

## HDMF 2.2.0 (August 14, 2020)

Expand Down
5 changes: 3 additions & 2 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -1043,8 +1043,9 @@ def __get_subspec_values(self, builder, spec, manager):
elif isinstance(spec, DatasetSpec):
if not isinstance(builder, DatasetBuilder):
raise ValueError("__get_subspec_values - must pass DatasetBuilder with DatasetSpec")
if spec.shape is None and getattr(builder.data, 'shape', None) == (1, ):
# if a scalar dataset is expected and a 1-element dataset is given, then read the dataset
if (spec.shape is None and getattr(builder.data, 'shape', None) == (1, ) and
type(builder.data[0]) != np.void):
# if a scalar dataset is expected and a 1-element non-compound dataset is given, then read the dataset
builder['data'] = builder.data[0] # use dictionary reference instead of .data to bypass error
ret[spec] = self.__check_ref_resolver(builder.data)
return ret
Expand Down
126 changes: 123 additions & 3 deletions tests/unit/build_tests/test_io_map_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from hdmf.spec import AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog
from hdmf.spec import AttributeSpec, DatasetSpec, DtypeSpec, SpecCatalog, SpecNamespace, NamespaceCatalog
from hdmf.build import DatasetBuilder, ObjectMapper, BuildManager, TypeMap
from hdmf import Data
from hdmf.utils import docval, getargs
from hdmf.utils import docval, getargs, call_docval_func
from hdmf.testing import TestCase

import h5py
Expand Down Expand Up @@ -44,7 +44,7 @@ def setUp(self):
self.mapper = ObjectMapper(self.baz_spec)

def setUpBazSpec(self):
self.baz_spec = DatasetSpec('an Baz type', 'int', name='MyBaz', data_type_def='Baz',
self.baz_spec = DatasetSpec('an Baz type', 'int', name='MyBaz', data_type_def='Baz', shape=[None],
attributes=[AttributeSpec('baz_attr', 'an example string attribute', 'text')])

def test_build(self):
Expand All @@ -69,3 +69,123 @@ def test_extend(self):
container.extend([4, 5])
np.testing.assert_array_equal(container[:], [1, 2, 3, 4, 5])
os.remove('test.h5')


class BazScalar(Data):

@docval({'name': 'name', 'type': str, 'doc': 'the name of this BazScalar'},
{'name': 'data', 'type': int, 'doc': 'some data'})
def __init__(self, **kwargs):
call_docval_func(super().__init__, kwargs)


class TestDataMapScalar(TestCase):

def setUp(self):
self.setUpBazSpec()
self.spec_catalog = SpecCatalog()
self.spec_catalog.register_spec(self.baz_spec, 'test.yaml')
self.namespace = SpecNamespace('a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}],
version='0.1.0',
catalog=self.spec_catalog)
self.namespace_catalog = NamespaceCatalog()
self.namespace_catalog.add_namespace(CORE_NAMESPACE, self.namespace)
self.type_map = TypeMap(self.namespace_catalog)
self.type_map.register_container_type(CORE_NAMESPACE, 'BazScalar', BazScalar)
self.type_map.register_map(BazScalar, ObjectMapper)
self.manager = BuildManager(self.type_map)
self.mapper = ObjectMapper(self.baz_spec)

def setUpBazSpec(self):
self.baz_spec = DatasetSpec('a BazScalar type', 'int', name='MyBaz', data_type_def='BazScalar')

def test_construct_scalar_dataset(self):
"""Test constructing a Data object with an h5py.Dataset with shape (1, ) for scalar spec."""
with h5py.File('test.h5', 'w') as file:
test_ds = file.create_dataset('test_ds', data=[1])
expected = BazScalar(
name='my_baz',
data=1,
)
builder = DatasetBuilder(
name='my_baz',
data=test_ds,
attributes={'data_type': 'BazScalar',
'namespace': CORE_NAMESPACE,
'object_id': expected.object_id},
)
container = self.mapper.construct(builder, self.manager)
self.assertTrue(np.issubdtype(type(container.data), np.integer)) # as opposed to h5py.Dataset
self.assertContainerEqual(container, expected)
os.remove('test.h5')


class BazScalarCompound(Data):

@docval({'name': 'name', 'type': str, 'doc': 'the name of this BazScalar'},
{'name': 'data', 'type': 'array_data', 'doc': 'some data'})
def __init__(self, **kwargs):
call_docval_func(super().__init__, kwargs)


class TestDataMapScalarCompound(TestCase):

def setUp(self):
self.setUpBazSpec()
self.spec_catalog = SpecCatalog()
self.spec_catalog.register_spec(self.baz_spec, 'test.yaml')
self.namespace = SpecNamespace('a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}],
version='0.1.0',
catalog=self.spec_catalog)
self.namespace_catalog = NamespaceCatalog()
self.namespace_catalog.add_namespace(CORE_NAMESPACE, self.namespace)
self.type_map = TypeMap(self.namespace_catalog)
self.type_map.register_container_type(CORE_NAMESPACE, 'BazScalarCompound', BazScalarCompound)
self.type_map.register_map(BazScalarCompound, ObjectMapper)
self.manager = BuildManager(self.type_map)
self.mapper = ObjectMapper(self.baz_spec)

def setUpBazSpec(self):
self.baz_spec = DatasetSpec(
doc='a BazScalarCompound type',
dtype=[
DtypeSpec(
name='id',
dtype='uint64',
doc='The unique identifier in this table.'
),
DtypeSpec(
name='attr1',
dtype='text',
doc='A text attribute.'
),
],
name='MyBaz',
data_type_def='BazScalarCompound',
)

def test_construct_scalar_compound_dataset(self):
"""Test construct on a compound h5py.Dataset with shape (1, ) for scalar spec does not resolve the data."""
with h5py.File('test.h5', 'w') as file:
comp_type = np.dtype([('id', np.uint64), ('attr1', h5py.special_dtype(vlen=str))])
test_ds = file.create_dataset(
name='test_ds',
data=np.array((1, 'text'), dtype=comp_type),
shape=(1, ),
dtype=comp_type
)
expected = BazScalarCompound(
name='my_baz',
data=(1, 'text'),
)
builder = DatasetBuilder(
name='my_baz',
data=test_ds,
attributes={'data_type': 'BazScalarCompound',
'namespace': CORE_NAMESPACE,
'object_id': expected.object_id},
)
container = self.mapper.construct(builder, self.manager)
self.assertEqual(type(container.data), h5py.Dataset)
self.assertContainerEqual(container, expected)
os.remove('test.h5')

0 comments on commit 0302e16

Please sign in to comment.