Skip to content

Commit

Permalink
DEV/TEST: ~3x h5_to_cube speed enhancement
Browse files Browse the repository at this point in the history
DEV:

* Change _exp_format from doing both the precision insertion into
  the substitution string and the value formatting, to only inserting
  the sub string. This avoids repeating the precision insertion call
  *millions* of times, since it only needs to be constructed once.
* Change the data processing segment of h5_to_cube so that it iterates
  over each (x, y) block of values (z and dataset # data are flattened
  and handled as one block). Values are still output in rows of six,
  and a newline is included after each (x, y) block

TEST:

Necessary to adjust the test suite for _exp_format to accommodate the
changed call structure.
  • Loading branch information
bskinn committed Dec 9, 2016
1 parent 4e76ec6 commit 300df11
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 22 deletions.
52 changes: 32 additions & 20 deletions h5cube/h5cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class EXIT(object):
FILEREAD = 4
FILEWRITE = 8

def _exp_format(val, prec):
def _exp_format(prec): #val, prec):
""" [Docstring]
"""
Expand All @@ -75,9 +75,9 @@ def _exp_format(val, prec):
# positive values with another leading space; negatives with the negative
# sign; one digit in front of the decimal, 'dec' digits after.
# Capital 'E' for the exponent.
out = " {{: #1.{0}E}}".format(prec).format(val)
out = " {{: #1.{0}E}}".format(prec)

# Return the results
# Return the result
return out

def _trynext(iterator, msg):
Expand Down Expand Up @@ -409,7 +409,7 @@ def h5_to_cube(h5path, *, delsrc=DEF.DEL, prec=DEF.PREC):
f.write('\n')

# Write the data blocks
# Pull them from the .h5cube file first
# Pull the entire dataset from the .h5cube file first
# Value-by-value data retrieval was tried and found to be
# HORRIFICALLY slow. Chunk-by-chunk retrieval might be better
# speed-wise, but appears to decrease the .h5cube compression
Expand All @@ -418,22 +418,34 @@ def h5_to_cube(h5path, *, delsrc=DEF.DEL, prec=DEF.PREC):
logvals = hf[H5.LOGDATA].value
outvals = np.multiply(signs, 10.0**logvals)

# Can just run a combinatorial iterator over the dimensions
# of the dataset
for i, t in enumerate(itt.product(*map(range, dims))):
# f.write(_exp_format(hf[H5.SIGNS].value[t] *
# 10.**hf[H5.LOGDATA].value[t], prec))
# f.write(_exp_format(signs[t] * 10. ** logvals[t], prec))
f.write(_exp_format(outvals[t], prec))

# Newline to wrap at a max of six values per line, or if at
# the last entry of a z-iteration and at the last dataset,
# for orbital files.
if i % 6 == 5 or (t[2] == dims[2] - 1 and
t[-1] == dims[-1] - 1):
f.write('\n')

# Always newlines at end
# Pre-fetch the format string
num_format = _exp_format(prec)

# Can just run a combinatorial iterator over the first two
# dimensions of the dataset
for gt in itt.product(*map(range, dims[:2])):
# Reset the output accumulator
outstr = ""

# Loop over all the data in each submatrix of the first
# two dimensions, flattening for straightforward
# iteration and converting to list since .format()
# goes appreciably faster when operating on native
# Python floats, rather than numpy types.
for i, v in enumerate(outvals[gt].flatten().tolist()):
# Append the value to the accumulator string. +=
# performed modestly better than .join().
outstr += num_format.format(v)

if i % 6 == 5:
outstr += '\n'

if not outstr.endswith('\n'):
outstr += '\n'

f.write(outstr)

# Always newlines at end of file
f.write('\n\n')

# If indicated, delete the source file
Expand Down
10 changes: 8 additions & 2 deletions h5cube/test/h5cube_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ def setUpClass(cls):

def test_FxnMisc_ExpFormat_Good(self):
""" Validate correct scientific notation formatting for decompression """
from h5cube.h5cube import _exp_format as _ef
from h5cube.h5cube import _exp_format as _ef_fxn

def _ef(val, prec):
return _ef_fxn(prec).format(val)

with self.subTest(type='typical'):
self.assertEqual(_ef(0.0183, 5), " 1.83000E-02")
Expand All @@ -38,7 +41,10 @@ def test_FxnMisc_ExpFormat_Good(self):

def test_FxnMisc_ExpFormat_Bad(self):
""" Confirm _exp_format breaks when bad arguments are passed """
from h5cube.h5cube import _exp_format as _ef
from h5cube.h5cube import _exp_format as _ef_fxn

def _ef(val, prec):
return _ef_fxn(prec).format(val)

with self.subTest(type='string'):
self.assertRaises(ValueError, _ef, "abcd", 5)
Expand Down

0 comments on commit 300df11

Please sign in to comment.