Skip to content

Commit

Permalink
add capability to add a row to a column after IO (#426)
Browse files Browse the repository at this point in the history
* add capability to add a row to a column that is using DataIO
* add capability to add a row to a column that is an h5py.Dataset
  • Loading branch information
bendichter committed Oct 12, 2020
1 parent 023353d commit abc291c
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 44 deletions.
2 changes: 1 addition & 1 deletion src/hdmf/common/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def add_vector(self, arg):
def __check_precision(self, idx):
"""
Check precision of current dataset and, if
necessary, adjust precision to accomodate new value.
necessary, adjust precision to accommodate new value.
Returns:
unsigned integer encoding of idx
Expand Down
29 changes: 3 additions & 26 deletions src/hdmf/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
from uuid import uuid4
from .utils import (docval, get_docval, call_docval_func, getargs, ExtenderMeta, get_data_shape, fmt_docval_args,
popargs, LabelledDict)
from .data_utils import DataIO
from .data_utils import DataIO, append_data, extend_data
from warnings import warn
import h5py
import types


Expand Down Expand Up @@ -477,32 +476,10 @@ def get(self, args):
return self.data[args]

def append(self, arg):
if isinstance(self.data, list):
self.data.append(arg)
elif isinstance(self.data, np.ndarray):
self.__data = np.append(self.__data, [arg])
elif isinstance(self.data, h5py.Dataset):
shape = list(self.__data.shape)
shape[0] += 1
self.__data.resize(shape)
self.__data[-1] = arg
else:
msg = "Data cannot append to object of type '%s'" % type(self.__data)
raise ValueError(msg)
self.__data = append_data(self.__data, arg)

def extend(self, arg):
if isinstance(self.data, list):
self.data.extend(arg)
elif isinstance(self.data, np.ndarray):
self.__data = np.append(self.__data, [arg])
elif isinstance(self.data, h5py.Dataset):
shape = list(self.__data.shape)
shape[0] += len(arg)
self.__data.resize(shape)
self.__data[-len(arg):] = arg
else:
msg = "Data cannot extend object of type '%s'" % type(self.__data)
raise ValueError(msg)
self.__data = extend_data(self.__data, arg)


class DataRegion(Data):
Expand Down
41 changes: 41 additions & 0 deletions src/hdmf/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,45 @@
import numpy as np
from warnings import warn
import copy
import h5py

from .utils import docval, getargs, popargs, docval_macro, get_data_shape


def append_data(data, arg):
if isinstance(data, (list, DataIO)):
data.append(arg)
return data
elif isinstance(data, np.ndarray):
return np.append(data, [arg])
elif isinstance(data, h5py.Dataset):
shape = list(data.shape)
shape[0] += 1
data.resize(shape)
data[-1] = arg
return data
else:
msg = "Data cannot append to object of type '%s'" % type(data)
raise ValueError(msg)


def extend_data(data, arg):
if isinstance(data, (list, DataIO)):
data.extend(arg)
return data
elif isinstance(data, np.ndarray):
return np.vstack((data, arg))
elif isinstance(data, h5py.Dataset):
shape = list(data.shape)
shape[0] += len(arg)
data.resize(shape)
data[-len(arg):] = arg
return data
else:
msg = "Data cannot extend object of type '%s'" % type(data)
raise ValueError(msg)


@docval_macro('array_data')
class AbstractDataChunkIterator(metaclass=ABCMeta):
"""
Expand Down Expand Up @@ -602,6 +637,12 @@ def __copy__(self):
newobj = DataIO(data=self.data)
return newobj

def append(self, arg):
self.__data = append_data(self.__data, arg)

def extend(self, arg):
self.__data = extend_data(self.__data, arg)

def __deepcopy__(self, memo):
"""
Define a custom copy method for deep copy.
Expand Down
86 changes: 69 additions & 17 deletions tests/unit/common/test_table.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import unittest
from hdmf.common import DynamicTable, VectorData, VectorIndex, ElementIdentifiers, DynamicTableRegion, VocabData
from hdmf.common import DynamicTable, VectorData, VectorIndex, ElementIdentifiers, \
DynamicTableRegion, VocabData, get_manager
from hdmf.testing import TestCase, H5RoundTripMixin
from hdmf.backends.hdf5 import H5DataIO
from hdmf.backends.hdf5 import H5DataIO, HDF5IO

from collections import OrderedDict
import h5py
Expand Down Expand Up @@ -543,7 +544,7 @@ def test_dynamic_table_region_to_dataframe(self):
def test_dynamic_table_region_to_dataframe_exclude_cols(self):
table = self.with_columns_and_data()
dynamic_table_region = DynamicTableRegion('dtr', [0, 1, 2, 2], 'desc', table=table)
res = dynamic_table_region.to_dataframe(exclude=set(['baz', 'foo']))
res = dynamic_table_region.to_dataframe(exclude={'baz', 'foo'})
self.assertListEqual(res.index.tolist(), [0, 1, 2, 2])
self.assertEqual(len(res.columns), 1)
self.assertListEqual(res['bar'].tolist(), [10.0, 20.0, 30.0, 30.0])
Expand Down Expand Up @@ -1180,6 +1181,45 @@ def setUpContainer(self):


class TestDataIOColumns(H5RoundTripMixin, TestCase):
def setUpContainer(self):
self.chunked_data = H5DataIO(
data=[i for i in range(10)],
chunks=(3,),
fillvalue=-1,
)
self.compressed_data = H5DataIO(
data=np.arange(10),
compression=1,
shuffle=True,
fletcher32=True,
allow_plugin_filters=True,
)
foo = VectorData(name='foo', description='chunked column', data=self.chunked_data)
bar = VectorData(name='bar', description='chunked column', data=self.compressed_data)

# NOTE: on construct, columns are ordered such that indices go before data, so create the table that way
# for proper comparison of the columns list
table = DynamicTable('table0', 'an example table', columns=[foo, bar])
table.add_row(foo=1, bar=1)
return table

def test_roundtrip(self):
super().test_roundtrip()

with h5py.File(self.filename, 'r') as f:
chunked_dset = f['foo']
self.assertTrue(np.all(chunked_dset[:] == self.chunked_data.data))
self.assertEqual(chunked_dset.chunks, (3,))
self.assertEqual(chunked_dset.fillvalue, -1)

compressed_dset = f['bar']
self.assertTrue(np.all(compressed_dset[:] == self.compressed_data.data))
self.assertEqual(compressed_dset.compression, 'gzip')
self.assertEqual(compressed_dset.shuffle, True)
self.assertEqual(compressed_dset.fletcher32, True)


class TestDataIOIndexedColumns(H5RoundTripMixin, TestCase):

def setUpContainer(self):
self.chunked_data = H5DataIO(
Expand All @@ -1202,6 +1242,10 @@ def setUpContainer(self):
# NOTE: on construct, columns are ordered such that indices go before data, so create the table that way
# for proper comparison of the columns list
table = DynamicTable('table0', 'an example table', columns=[foo_ind, foo, bar_ind, bar])

# check for add_row
table.add_row(foo=np.arange(30).reshape(5, 2, 3), bar=np.arange(30).reshape(5, 2, 3))

return table

def test_roundtrip(self):
Expand All @@ -1227,25 +1271,29 @@ def setUpContainer(self):
data=np.arange(30).reshape(5, 2, 3),
chunks=(1, 1, 3),
fillvalue=-1,
maxshape=(None, 2, 3)
)
self.chunked_index_data = H5DataIO(
data=np.array([2, 3, 5], dtype=np.uint),
chunks=(2, ),
fillvalue=np.uint(10),
maxshape=(None,)
)
self.compressed_data = H5DataIO(
data=np.arange(30).reshape(5, 2, 3),
compression=1,
shuffle=True,
fletcher32=True,
allow_plugin_filters=True,
maxshape=(None, 2, 3)
)
self.compressed_index_data = H5DataIO(
data=np.array([2, 3, 5], dtype=np.uint),
data=np.array([2, 4, 5], dtype=np.uint),
compression=1,
shuffle=True,
fletcher32=False,
allow_plugin_filters=True,
maxshape=(None,)
)
foo = VectorData(name='foo', description='chunked column', data=self.chunked_data)
foo_ind = VectorIndex(name='foo_index', target=foo, data=self.chunked_index_data)
Expand All @@ -1254,20 +1302,24 @@ def setUpContainer(self):

# NOTE: on construct, columns are ordered such that indices go before data, so create the table that way
# for proper comparison of the columns list
table = DynamicTable('table0', 'an example table', columns=[foo_ind, foo, bar_ind, bar])
table = DynamicTable('table0', 'an example table', columns=[foo_ind, foo, bar_ind, bar],
id=H5DataIO(data=[0, 1, 2], chunks=True, maxshape=(None,)))

# check for add_row
table.add_row(foo=np.arange(30).reshape(5, 2, 3),
bar=np.arange(30).reshape(5, 2, 3))

return table

def test_roundtrip(self):
super().test_roundtrip()
def test_append(self, cache_spec=False):
"""Write the container to an HDF5 file, read the container from the file, and append to it."""
with HDF5IO(self.filename, manager=get_manager(), mode='w') as write_io:
write_io.write(self.container, cache_spec=cache_spec)

with h5py.File(self.filename, 'r') as f:
chunked_dset = f['foo_index']
self.assertTrue(np.all(chunked_dset[:] == self.chunked_index_data.data))
self.assertEqual(chunked_dset.chunks, (2, ))
self.assertEqual(chunked_dset.fillvalue, 10)
self.reader = HDF5IO(self.filename, manager=get_manager(), mode='a')
read_table = self.reader.read()

compressed_dset = f['bar_index']
self.assertTrue(np.all(compressed_dset[:] == self.compressed_index_data.data))
self.assertEqual(compressed_dset.compression, 'gzip')
self.assertEqual(compressed_dset.shuffle, True)
self.assertEqual(compressed_dset.fletcher32, False)
data = np.arange(30, 60).reshape(5, 2, 3)
read_table.add_row(foo=data, bar=data)

np.testing.assert_array_equal(read_table['foo'][-1], data)

0 comments on commit abc291c

Please sign in to comment.