Skip to content

Commit

Permalink
Merge pull request #642 from duncanmmacleod/hdf5-T1800014
Browse files Browse the repository at this point in the history
Improved HDF5 writing to follow T1800014 proposals
  • Loading branch information
Duncan Macleod committed Jan 18, 2018
2 parents 88ae495 + 51bcd67 commit 46e10d8
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 101 deletions.
12 changes: 11 additions & 1 deletion .codeclimate.yml
@@ -1,4 +1,11 @@
engines:
version: "2"

checks:
file-lines:
config:
threshold: 500

plugins:
duplication:
enabled: true
config:
Expand All @@ -14,6 +21,9 @@ engines:
enabled: true
radon:
enabled: true
config:
threshold: "C"

ratings:
paths:
- "**.py"
Expand Down
57 changes: 25 additions & 32 deletions gwpy/io/hdf5.py
Expand Up @@ -131,46 +131,39 @@ def decorated_func(obj, fobj, *args, **kwargs):
return decorated_func


@with_write_hdf5
def write_object_dataset(obj, target, create_func, append=False,
overwrite=False, **kwargs):
"""Write the given dataset to the file
def create_dataset(parent, path, overwrite=False, **kwargs):
"""Create a new dataset inside the parent HDF5 object
Parameters
----------
obj : `object`
the object to write into the dataset
parent : `h5py.Group`, `h5py.File`
the object in which to create a new dataset
target : `str`, `h5py.File`, `h5py.Group`
the output filepath, or the HDF5 object in which to write
path : `str`
the path at which to create the new dataset
create_func : `callable`
a callable that can write the ``obj`` into an `h5py.Dataset`,
must take an ``h5py.Group`` as the first argument, and ``obj``
as the second, other keyword arguments may follow
append : `bool`, default: `False`
if `True`, write new dataset to existing file, otherwise an
exception will be raised if the output file exists (only used if
``f`` is `str`)
overwrite : `bool`, default: `False`
if `True`, overwrite an existing dataset in an existing file,
otherwise an exception will be raised if a dataset exists with
the given name (only used if ``f`` is `str`)
overwrite : `bool`
if `True`, delete any existing dataset at the desired path,
default: `False`
**kwargs
other keyword arguments to pass to ``create_func``
other arguments are passed directly to
:meth:`h5py.Group.create_dataset`
Returns
-------
dset : `h5py.Dataset`
the dataset as created in the file
Raises
------
ValueError
if the output file exists and ``append=False``
dataset : `h5py.Dataset`
the newly created dataset
"""
return create_func(target, obj, append=append, overwrite=overwrite,
**kwargs)
# force deletion of existing dataset
if path in parent and overwrite:
del parent[path]

# create new dataset with improved error handling
try:
return parent.create_dataset(path, **kwargs)
except RuntimeError as exc:
if str(exc) == 'Unable to create link (Name already exists)':
exc.args = ('{0}: {1!r}, pass overwrite=True '
'to ignore existing datasets'.format(str(exc), path),)
raise
9 changes: 4 additions & 5 deletions gwpy/spectrogram/io/__init__.py
Expand Up @@ -19,9 +19,8 @@
"""Input/Output routines for the Spectrogram.
"""

__author__ = "Duncan Macleod <duncan.macleod@ligo.org>"
from . import (
hdf5
)

try:
from . import hdf5
except ImportError:
pass
__author__ = "Duncan Macleod <duncan.macleod@ligo.org>"
18 changes: 13 additions & 5 deletions gwpy/timeseries/io/hdf5.py
Expand Up @@ -19,12 +19,16 @@
"""This module attaches the HDF5 input output methods to the TimeSeries.
"""

from astropy import units

from ...io import registry as io_registry
from ...io.hdf5 import (identify_hdf5, with_read_hdf5, with_write_hdf5)
from ...types.io.hdf5 import (read_hdf5_array, write_hdf5_array)
from ...types.io.hdf5 import (read_hdf5_array, write_hdf5_series)
from .. import (TimeSeries, TimeSeriesDict,
StateVector, StateVectorDict)

SEC_UNIT = units.second

__author__ = 'Duncan Macleod <duncan.macleod@ligo.org>'


Expand All @@ -42,6 +46,12 @@ def read_hdf5_timeseries(h5f, path=None, start=None, end=None, **kwargs):
return series


def _is_timeseries_dataset(dataset):
"""Returns `True` if a dataset contains `TimeSeries` data
"""
return SEC_UNIT.is_equivalent(dataset.attrs.get('xunit', 'undef'))


@with_read_hdf5
def read_hdf5_dict(h5f, names=None, group=None, **kwargs):
"""Read a `TimeSeriesDict` from HDF5
Expand All @@ -54,9 +64,7 @@ def read_hdf5_dict(h5f, names=None, group=None, **kwargs):

# find list of names to read
if names is None:
# TODO: improve the TimeSeries -> HDF5 format to make detecting
# a TimeSeries easier
names = [key for key in h5g if 'dx' in h5g[key]]
names = [key for key in h5g if _is_timeseries_dataset(h5g[key])]

# read names
out = kwargs.pop('dict_type', TimeSeriesDict)()
Expand Down Expand Up @@ -108,7 +116,7 @@ def write_hdf5_dict(tsdict, h5f, group=None, **kwargs):
for series_class in (TimeSeries, StateVector):
reader = read_hdf5_factory(series_class)
io_registry.register_reader('hdf5', series_class, reader)
io_registry.register_writer('hdf5', series_class, write_hdf5_array)
io_registry.register_writer('hdf5', series_class, write_hdf5_series)
io_registry.register_identifier('hdf5', series_class, identify_hdf5)

# dict classes
Expand Down

0 comments on commit 46e10d8

Please sign in to comment.