Skip to content

Commit

Permalink
Support pathlib paths in HDMFIO/HDF5IO (#450)
Browse files Browse the repository at this point in the history
* Fix #439
* Ref NeurodataWithoutBorders/pynwb#1303
* Support pathlib.Path as source parameter in HDMFIO.__init__
* Support pathlib.Path as path parameter in HDF5IO.__init__
* Suport pathlib.Path as path parameter in HDF5IO.load_namespaces
* Add new tests using pathlib paths
  • Loading branch information
dsleiter committed Nov 5, 2020
1 parent cc8fa35 commit a15fd20
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- Add capability to add a row to a column after IO. @bendichter (#426)
- Add method `hdmf.utils.get_docval_macro` to get a tuple of the current values for a docval_macro, e.g., 'array_data'
and 'scalar_data'. @rly (#456)
- Support `pathlib.Path` paths in `HDMFIO.__init__`, `HDF5IO.__init__`, and `HDF5IO.load_namespaces`. @dsleiter (#439)

### Internal improvements
- Refactor `HDF5IO.write_dataset` to be more readable. @rly (#428)
Expand Down
11 changes: 9 additions & 2 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections import deque
import numpy as np
import os.path
from pathlib import Path
from functools import partial
from h5py import File, Group, Dataset, special_dtype, SoftLink, ExternalLink, Reference, RegionReference, check_dtype
import logging
Expand Down Expand Up @@ -29,7 +30,7 @@

class HDF5IO(HDMFIO):

@docval({'name': 'path', 'type': str, 'doc': 'the path to the HDF5 file'},
@docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file'},
{'name': 'manager', 'type': (TypeMap, BuildManager),
'doc': 'the BuildManager or a TypeMap to construct a BuildManager to use for I/O', 'default': None},
{'name': 'mode', 'type': str,
Expand All @@ -45,6 +46,9 @@ def __init__(self, **kwargs):
self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__))
path, manager, mode, comm, file_obj = popargs('path', 'manager', 'mode', 'comm', 'file', kwargs)

if isinstance(path, Path):
path = str(path)

if file_obj is not None and os.path.abspath(file_obj.filename) != os.path.abspath(path):
msg = 'You argued %s as this object\'s path, ' % path
msg += 'but supplied a file with filename: %s' % file_obj.filename
Expand Down Expand Up @@ -86,7 +90,7 @@ def _file(self):
@classmethod
@docval({'name': 'namespace_catalog', 'type': (NamespaceCatalog, TypeMap),
'doc': 'the NamespaceCatalog or TypeMap to load namespaces into'},
{'name': 'path', 'type': str, 'doc': 'the path to the HDF5 file', 'default': None},
{'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None},
{'name': 'namespaces', 'type': list, 'doc': 'the namespaces to load', 'default': None},
{'name': 'file', 'type': File, 'doc': 'a pre-existing h5py.File object', 'default': None},
returns="dict with the loaded namespaces", rtype=dict)
Expand All @@ -100,6 +104,9 @@ def load_namespaces(cls, **kwargs):
namespace_catalog, path, namespaces, file_obj = popargs('namespace_catalog', 'path', 'namespaces', 'file',
kwargs)

if isinstance(path, Path):
path = str(path)

if path is None and file_obj is None:
raise ValueError("Either the 'path' or 'file' argument must be supplied to load_namespaces.")

Expand Down
11 changes: 8 additions & 3 deletions src/hdmf/backends/io.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from abc import ABCMeta, abstractmethod
from pathlib import Path

from ..build import BuildManager, GroupBuilder
from ..utils import docval, getargs, popargs
Expand All @@ -8,12 +9,16 @@
class HDMFIO(metaclass=ABCMeta):
@docval({'name': 'manager', 'type': BuildManager,
'doc': 'the BuildManager to use for I/O', 'default': None},
{"name": "source", "type": str,
{"name": "source", "type": (str, Path),
"doc": "the source of container being built i.e. file path", 'default': None})
def __init__(self, **kwargs):
self.__manager = getargs('manager', kwargs)
manager, source = getargs('manager', 'source', kwargs)
if isinstance(source, Path):
source = str(source)

self.__manager = manager
self.__built = dict()
self.__source = getargs('source', kwargs)
self.__source = source
self.open()

@property
Expand Down
36 changes: 36 additions & 0 deletions tests/unit/test_io_hdf5_h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
import h5py
from io import BytesIO
from pathlib import Path

from hdmf.utils import docval, getargs
from hdmf.data_utils import DataChunkIterator, InvalidDataIOError
Expand Down Expand Up @@ -850,6 +851,20 @@ def test_roundtrip_empty_group(self):
read_foofile = io.read()
self.assertDictEqual({}, read_foofile.buckets['bucket1'].foos)

def test_roundtrip_pathlib_path(self):
pathlib_path = Path(self.path)
foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
foobucket = FooBucket('bucket1', [foo1])
foofile = FooFile([foobucket])

with HDF5IO(pathlib_path, manager=self.manager, mode='w') as io:
io.write(foofile)

with HDF5IO(pathlib_path, manager=self.manager, mode='r') as io:
read_foofile = io.read()
self.assertListEqual(foofile.buckets['bucket1'].foos['foo1'].my_data,
read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist())


class TestHDF5IO(TestCase):

Expand Down Expand Up @@ -885,6 +900,11 @@ def test_set_file_mismatch(self):
with self.assertRaisesWith(ValueError, err_msg):
HDF5IO(self.path, manager=self.manager, mode='w', file=self.file_obj)

def test_pathlib_path(self):
pathlib_path = Path(self.path)
with HDF5IO(pathlib_path, mode='w') as io:
self.assertEqual(io.source, self.path)


class TestCacheSpec(TestCase):

Expand Down Expand Up @@ -1988,6 +2008,22 @@ def test_load_namespaces_file_path_mismatched(self):

file_obj.close()

def test_load_namespaces_with_pathlib_path(self):
"""Test that loading a namespace using a valid pathlib Path is OK and returns the correct dictionary."""

# Setup all the data we need
foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
foobucket = FooBucket('bucket1', [foo1])
foofile = FooFile([foobucket])

with HDF5IO(self.path, manager=self.manager, mode='w') as io:
io.write(foofile)

pathlib_path = Path(self.path)
ns_catalog = NamespaceCatalog()
d = HDF5IO.load_namespaces(ns_catalog, pathlib_path)
self.assertEqual(d, {'test_core': {}}) # test_core has no dependencies


class TestExport(TestCase):
"""Test exporting HDF5 to HDF5 using HDF5IO.export_container_to_hdf5."""
Expand Down

0 comments on commit a15fd20

Please sign in to comment.