Permalink
Browse files

Merge pull request #461 from astrofrog/io/table-hdf5

Support for HDF5 table I/O
  • Loading branch information...
astrofrog committed Nov 7, 2012
2 parents 36235b3 + 1a41fe1 commit e5cfdac237b4b05a11fee343089d5f260ecfdc81
View
@@ -20,6 +20,8 @@ see the "What's New" section of the documentation for more details.
- ``astropy.table`` I/O infrastructure for custom readers/writers
implemented. [#305]
+ - Added support for reading/writing HDF5 files [#461]
+
- New ``astropy.time`` sub-package. [#332]
- New ``astropy.units`` sub-package. This has the following effects on
@@ -98,7 +98,7 @@ def write_latex(table, filename, **kwargs):
def is_latex(origin, args, kwargs):
- return args[0].endswith('.tex')
+ return isinstance(args[0], basestring) and args[0].endswith('.tex')
io_registry.register_identifier('latex', is_latex)
@@ -125,6 +125,6 @@ def write_rdb(table, filename, **kwargs):
def is_rdb(origin, args, kwargs):
- return args[0].endswith('.rdb')
+ return isinstance(args[0], basestring) and args[0].endswith('.rdb')
io_registry.register_identifier('rdb', is_rdb)
@@ -0,0 +1,3 @@
+# Licensed under a 3-clause BSD style license - see LICENSE.rst
+
+from .pickle_helpers import *
View
@@ -0,0 +1,10 @@
+# This file connects any readers/writers defined in io.misc to the
+# astropy.table.Table class
+
+from ...table import io_registry
+
+from .hdf5 import read_table_hdf5, write_table_hdf5, is_hdf5
+
+io_registry.register_reader('hdf5', read_table_hdf5)
+io_registry.register_writer('hdf5', write_table_hdf5)
+io_registry.register_identifier('hdf5', is_hdf5)
View
@@ -0,0 +1,197 @@
+# Licensed under a 3-clause BSD style license - see LICENSE.rst
+
+from __future__ import print_function
+
+import os
+
+import numpy as np
+
+from ... import log
+
+HDF5_SIGNATURE = '\x89HDF\r\n\x1a\n'
+
+__all__ = ['read_table_hdf5', 'write_table_hdf5']
+
+
+def is_hdf5(origin, args, kwargs):
+
+ if isinstance(args[0], basestring):
+ if os.path.exists(args[0]):
+ with open(args[0], 'rb') as f:
+ if f.read(8) == HDF5_SIGNATURE:
+ return True
+ else:
+ return False
+ elif args[0].endswith('.hdf5') or args[0].endswith('.h5'):
+ return True
+
+ try:
+ import h5py
+ except ImportError:
+ return False
+ else:
+ if isinstance(args[0], (h5py.highlevel.File, h5py.highlevel.Group)):
+ return True
+ else:
+ return False
+
+
+def read_table_hdf5(input, path=None):
+ """
+ Read a Table object from an HDF5 file
+
+ This requires `h5py <http://alfven.org/wp/hdf5-for-python/>`_ to be
+ installed.
+
+ Parameters
+ ----------
+ input : str or `h5py.highlevel.File` or `h5py.highlevel.Group`
+ If a string, the filename to read the table from. If an h5py object,
+ either the file or the group object to read the table from.
+ path : str
+ The path from which to read the table inside the HDF5 file.
+ This should be relative to the input file or group.
+ """
+
+ try:
+ import h5py
+ except ImportError:
+ raise Exception("h5py is required to read and write HDF5 files")
+
+ if path is None:
+ raise ValueError("table path should be set via the path= argument")
+ elif path.endswith('/'):
+ raise ValueError("table path should end with table name, not /")
+
+ if '/' in path:
+ group, name = path.rsplit('/', 1)
+ else:
+ group, name = None, path
+
+ if isinstance(input, h5py.highlevel.File) or \
+ isinstance(input, h5py.highlevel.Group):
+ f, g = None, input
+ if group:
+ try:
+ g = g[group]
+ except KeyError:
+ raise IOError("Group {0} does not exist".format(group))
+ else:
+ f = h5py.File(input, 'r')
+ try:
+ g = f[group] if group else f
+ except KeyError:
+ raise IOError("Group {0} does not exist".format(group))
+
+ # Check whether table exists
+ if name not in g.keys():
+ raise IOError("Table {0} does not exist".format(path))
+
+ # Read the table from the file
+ dset = g[name]
+
+ # Create a Table object
+ from ...table import Table
+ table = Table(np.array(dset))
+
+ # Read the meta-data from the file
+ for key in dset.attrs:
+ table.meta[key] = dset.attrs[key]
+
+ if f is not None:
+ f.close()
+
+ return table
+
+
+def write_table_hdf5(table, output, path=None, compression=False,
+ append=False, overwrite=False):
+ """
+ Write a Table object to an HDF5 file
+
+ This requires `h5py <http://alfven.org/wp/hdf5-for-python/>`_ to be
+ installed.
+
+ Parameters
+ ----------
+ output : str or `h5py.highlevel.File` or `h5py.highlevel.Group`
+ If a string, the filename to write the table to. If an h5py object,
+ either the file or the group object to write the table to.
+ compression : bool
+ Whether to compress the table inside the HDF5 file.
+ path : str
+ The path to which to write the table inside the HDF5 file.
+ This should be relative to the input file or group.
+ append : bool
+ Whether to append the table to an existing HDF5 file.
+ overwrite : bool
+ Whether to overwrite any existing file without warning.
+ """
+
+ try:
+ import h5py
+ except ImportError:
+ raise Exception("h5py is required to read and write HDF5 files")
+
+ if path is None:
+ raise ValueError("table path should be set via the path= argument")
+ elif path.endswith('/'):
+ raise ValueError("table path should end with table name, not /")
+
+ if '/' in path:
+ group, name = path.rsplit('/', 1)
+ else:
+ group, name = None, path
+
+ if isinstance(output, h5py.highlevel.File) or \
+ isinstance(output, h5py.highlevel.Group):
+ f, g = None, output
+ if group:
+ try:
+ g = g[group]
+ except KeyError:
+ g = g.create_group(group)
+ else:
+ if os.path.exists(output) and not append:
+ if overwrite:
+ os.remove(output)
+ else:
+ raise IOError("File exists: {0}".format(output))
+
+ # Open the file for appending or writing
+ f = h5py.File(output, 'a' if append else 'w')
+
+ if group:
+ if append:
+ if group in f.keys():
+ g = f[group]
+ else:
+ g = f.create_group(group)
+ else:
+ g = f.create_group(group)
+ else:
+ g = f
+
+ # Check whether table already exists
+ if name in g.keys():
+ raise IOError("Table {0} already exists".format(path))
+
+ # Write the table to the file
+ dset = g.create_dataset(name, data=table._data, compression=compression)
+
+ # Write the meta-data to the file
+ for key in table.meta:
+
+ if isinstance(table.meta[key], basestring):
+ # Use np.string_ to ensure that fixed-length attributes are used.
+ dset.attrs[key] = np.string_(table.meta[key])
+ else:
+ try:
+ dset.attrs[key] = table.meta[key]
+ except TypeError:
+ log.warn("Attribute `{0}` of type {1} cannot be written to "
+ "HDF5 files - skipping".format(key,
+ type(table.meta[key])))
+
+ if f is not None:
+ f.close()
File renamed without changes.
@@ -0,0 +1 @@
+# Licensed under a 3-clause BSD style license - see LICENSE.rst
Oops, something went wrong.

0 comments on commit e5cfdac

Please sign in to comment.