Skip to content

Commit

Permalink
ENH: Remove storage overhead of removing a record (#25)
Browse files Browse the repository at this point in the history
* ENH: Remove space overhead of removing a record

* BUG: Update the timestamp after copying data.

* TST: Extent 'remove' test to all supported data.

* MAINT: Fixing some typos
  • Loading branch information
cfarrow committed Sep 12, 2017
1 parent a88fec7 commit f94ce81
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 7 deletions.
48 changes: 43 additions & 5 deletions sda/sda_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@
"""

from contextlib import contextmanager
import os
import os.path as op
import re
import shutil
import tempfile

import h5py
import numpy as np
Expand All @@ -28,7 +31,7 @@
class SDAFile(object):
""" Read, write, inspect, and manipulate Sandia Data Archive files.
This supports version 1.1 of the Sandai Data Archive format.
This supports version 1.1 of the Sandia Data Archive format.
"""

Expand Down Expand Up @@ -282,10 +285,45 @@ def remove(self, *labels):

self._validate_labels(labels, must_exist=True)

with self._h5file('a') as h5file:
for label in labels:
del h5file[label]
update_header(h5file.attrs)
# Create a new file so space is actually freed
labels = set(labels)

def _copy_visitor(path):
""" Visitor that copies data from source to destination """

# Skip paths corresponding to excluded labels
if path.split('/')[0] in labels:
return

# Copy everything else
source_obj = source[path]
destination.attrs.update(source.attrs)
if isinstance(source_obj, h5py.Group):
destination.create_group(path)
else:
ds = source_obj
destination.create_dataset(
path,
data=source_obj[()],
chunks=ds.chunks,
maxshape=ds.maxshape,
compression=ds.compression,
compression_opts=ds.compression_opts,
scaleoffset=ds.scaleoffset,
shuffle=ds.shuffle,
fletcher32=ds.fletcher32,
fillvalue=ds.fillvalue,
)

pid, destination_path = tempfile.mkstemp()
os.close(pid)
destination = h5py.File(destination_path, 'w')
with self._h5file('r') as source:
destination.attrs.update(source.attrs)
source.visit(_copy_visitor)
update_header(destination.attrs)
destination.close()
shutil.move(destination_path, self._filename)

def probe(self, pattern=None):
""" Summarize the state of the archive
Expand Down
9 changes: 7 additions & 2 deletions sda/tests/test_sda_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,13 +671,18 @@ def test_labels(self):
sda_file.insert('l1', [1])
self.assertEqual(sorted(sda_file.labels()), ['l0', 'l1'])

def test_delete(self):
def test_remove(self):
with temporary_file() as file_path:
sda_file = SDAFile(file_path, 'w')

labels = []

for i, (obj, _) in enumerate(TEST_SCALARS + TEST_ARRAYS):
ALL = (
[obj for obj, _ in TEST_ARRAYS + TEST_SCALARS] + TEST_CELL +
TEST_SPARSE + TEST_SPARSE_COMPLEX + TEST_STRUCTURE
)

for i, obj in enumerate(ALL):
label = 'test' + str(i)
labels.append(label)
sda_file.insert(label, obj)
Expand Down

0 comments on commit f94ce81

Please sign in to comment.