Skip to content

Commit

Permalink
add multi-index code and test (#430)
Browse files Browse the repository at this point in the history
* add multi_index code and test.
* Fix VectorIndex.add_vector, DynamicTable.add_column for nested indexing
* Update test_table.py
* documentation and test of automatic column generation

Co-authored-by: Ryan Ly <rly@lbl.gov>
  • Loading branch information
bendichter and rly committed Oct 13, 2020
1 parent abc291c commit 31a0fd1
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 10 deletions.
45 changes: 35 additions & 10 deletions src/hdmf/common/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ def add_vector(self, arg):
Add the given data value to the target VectorData and append the corresponding index to this VectorIndex
:param arg: The data value to be added to self.target
"""
self.target.extend(arg)
if isinstance(self.target, VectorIndex):
for a in arg:
self.target.add_vector(a)
else:
self.target.extend(arg)
self.append(self.__check_precision(len(self.target)))

def __check_precision(self, idx):
Expand Down Expand Up @@ -557,8 +561,13 @@ def __eq__(self, other):
'doc': 'a dataset where the first dimension is a concatenation of multiple vectors', 'default': list()},
{'name': 'table', 'type': (bool, 'DynamicTable'),
'doc': 'whether or not this is a table region or the table the region applies to', 'default': False},
{'name': 'index', 'type': (bool, VectorIndex, 'array_data'),
'doc': 'whether or not this column should be indexed', 'default': False},
{'name': 'index', 'type': (bool, VectorIndex, 'array_data', int),
'doc': 'False (default): do not generate a VectorIndex \n'
'True: generate one empty VectorIndex \n'
'VectorIndex: Use the supplied VectorIndex \n'
'array-like of ints: Create a VectorIndex and use these values as the data \n'
'int: Recursively create `n` VectorIndex objects for a multi-ragged array \n',
'default': False},
{'name': 'vocab', 'type': (bool, 'array_data'), 'default': False,
'doc': ('whether or not this column contains data from a '
'controlled vocabulary or the controlled vocabulary')},
Expand Down Expand Up @@ -642,23 +651,30 @@ def add_column(self, **kwargs): # noqa: C901
if index is not False:
if isinstance(index, VectorIndex):
col_index = index
self.__add_column_index_helper(col_index)
elif isinstance(index, bool): # make empty VectorIndex
if len(col) > 0:
raise ValueError("cannot pass empty index with non-empty data to index")
col_index = VectorIndex(name + "_index", list(), col)
self.__add_column_index_helper(col_index)
elif isinstance(index, int):
assert index > 0, ValueError("integer index value must be greater than 0")
assert len(col) == 0, ValueError("cannot pass empty index with non-empty data to index")
index_name = name
for i in range(index):
index_name = index_name + "_index"
col_index = VectorIndex(index_name, list(), col)
self.__add_column_index_helper(col_index)
if i < index - 1:
columns.insert(0, col_index)
col = col_index
else: # make VectorIndex with supplied data
if len(col) == 0:
raise ValueError("cannot pass non-empty index with empty data to index")
col_index = VectorIndex(name + "_index", index, col)
self.__add_column_index_helper(col_index)
columns.insert(0, col_index)
if not isinstance(col_index.parent, Container):
col_index.parent = self
# else, the ObjectMapper will create a link from self (parent) to col_index (child with existing parent)
col = col_index
self.__indices[col_index.name] = col_index
self.__set_table_attr(col_index)
if col_index in self.__uninit_cols:
self.__uninit_cols.pop(col_index)

if len(col) != len(self.id):
raise ValueError("column must have the same number of rows as 'id'")
Expand All @@ -667,6 +683,15 @@ def add_column(self, **kwargs): # noqa: C901
self.fields['columns'] = tuple(list(self.columns)+columns)
self.__df_cols.append(col)

def __add_column_index_helper(self, col_index):
if not isinstance(col_index.parent, Container):
col_index.parent = self
# else, the ObjectMapper will create a link from self (parent) to col_index (child with existing parent)
self.__indices[col_index.name] = col_index
self.__set_table_attr(col_index)
if col_index in self.__uninit_cols:
self.__uninit_cols.pop(col_index)

@docval({'name': 'name', 'type': str, 'doc': 'the name of the DynamicTableRegion object'},
{'name': 'region', 'type': (slice, list, tuple), 'doc': 'the indices of the table'},
{'name': 'description', 'type': str, 'doc': 'a brief description of what the region is'})
Expand Down
58 changes: 58 additions & 0 deletions tests/unit/common/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,46 @@ def test_add_column_vectorindex(self):
with self.assertWarnsWith(FutureWarning, msg):
table.add_column(name='bad', description='bad column', index=ind)

def test_add_column_multi_index(self):
table = self.with_spec()
table.add_column(name='qux', description='qux column', index=2)
table.add_row(foo=5, bar=50.0, baz='lizard',
qux=[
[1, 2, 3],
[1, 2, 3, 4]
])
table.add_row(foo=5, bar=50.0, baz='lizard',
qux=[
[1, 2]
]
)

def test_auto_multi_index(self):

class TestTable(DynamicTable):
__columns__ = (dict(name='qux', description='qux column', index=2),)

table = TestTable('table_name', 'table_description')
table.add_row(qux=[
[1, 2, 3],
[1, 2, 3, 4]
])
table.add_row(qux=[
[1, 2]
]
)

np.testing.assert_array_equal(table['qux'][:],
[
[
[1, 2, 3],
[1, 2, 3, 4]
],
[
[1, 2]
]
])

def test_getitem_row_num(self):
table = self.with_spec()
self.add_rows(table)
Expand Down Expand Up @@ -1082,6 +1122,24 @@ def test_index(self):
self.assertListEqual(foo_ind_ind[0], [['a11', 'a12'], ['a21']])
self.assertListEqual(foo_ind_ind[1], [['b11']])

def test_add_vector(self):
# row 1 has three entries
# the first entry has two sub-entries
# the first sub-entry has two values, the second sub-entry has one value
# the second entry has one sub-entry, which has one value
foo = VectorData(name='foo', description='foo column', data=['a11', 'a12', 'a21', 'b11'])
foo_ind = VectorIndex(name='foo_index', target=foo, data=[2, 3, 4])
foo_ind_ind = VectorIndex(name='foo_index_index', target=foo_ind, data=[2, 3])

foo_ind_ind.add_vector([['c11', 'c12', 'c13'], ['c21', 'c22']])

self.assertListEqual(foo.data, ['a11', 'a12', 'a21', 'b11', 'c11', 'c12', 'c13', 'c21', 'c22'])
self.assertListEqual(foo_ind.data, [2, 3, 4, 7, 9])
self.assertListEqual(foo_ind[3], ['c11', 'c12', 'c13'])
self.assertListEqual(foo_ind[4], ['c21', 'c22'])
self.assertListEqual(foo_ind_ind.data, [2, 3, 5])
self.assertListEqual(foo_ind_ind[2], [['c11', 'c12', 'c13'], ['c21', 'c22']])


class TestDTDoubleIndex(TestCase):

Expand Down

0 comments on commit 31a0fd1

Please sign in to comment.