Skip to content

Commit

Permalink
BUG: Fixing storage incompatibilities. (#30)
Browse files Browse the repository at this point in the history
  • Loading branch information
cfarrow committed Sep 13, 2017
1 parent fbf12c0 commit b07d3be
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 156 deletions.
163 changes: 55 additions & 108 deletions scripts/make_test_data.py
Original file line number Diff line number Diff line change
@@ -1,117 +1,64 @@
import argparse

import h5py

from sdafile.utils import write_header


def make_test_data(filename):

with h5py.File(filename, 'w') as f:

# Header
write_header(f.attrs)

# curly
g = f.create_group('curly')
g.attrs['RecordType'] = 'numeric'
g.attrs['Empty'] = 'no'
g.attrs['Deflate'] = 0
g.attrs['Description'] = '3x4 array (double)'

ds = g.create_dataset(
'curly',
shape=(3, 4),
maxshape=(None, None),
dtype='<f8',
chunks=(3, 4),
compression=0, # gzip
fillvalue=0.0,
)
ds.attrs['RecordType'] = 'numeric'
ds.attrs['Empty'] = 'no'

# curly 2
g = f.create_group('curly 2')
g.attrs['RecordType'] = 'numeric'
g.attrs['Empty'] = 'no'
g.attrs['Deflate'] = 0
g.attrs['Description'] = 'Single version of curly'

ds = g.create_dataset(
'curly 2',
shape=(3, 4),
maxshape=(None, None),
dtype='<f4',
chunks=(3, 4),
compression=0,
fillvalue=0.0,
)
ds.attrs['RecordType'] = 'numeric'
ds.attrs['Empty'] = 'no'

# larry
g = f.create_group('larry')
g.attrs['RecordType'] = 'character'
g.attrs['Empty'] = 'yes'
g.attrs['Deflate'] = 0
g.attrs['Description'] = 'An empty character array'

ds = g.create_dataset(
'larry',
shape=(1, 1),
maxshape=(None, None),
dtype='<u1',
chunks=(1, 1),
compression=0,
fillvalue=0,
)
ds.attrs['RecordType'] = 'character'
ds.attrs['Empty'] = 'yes'

# moe
g = f.create_group('moe')
g.attrs['RecordType'] = 'logical'
g.attrs['Empty'] = 'no'
g.attrs['Deflate'] = 0
g.attrs['Description'] = 'A 1x1 logical array'

ds = g.create_dataset(
'moe',
shape=(1, 1),
maxshape=(None, None),
dtype='<u1',
chunks=(1, 1),
compression=0,
fillvalue=0,
)
ds.attrs['RecordType'] = 'logical'
ds.attrs['Empty'] = 'no'

# my function
g = f.create_group('my function')
g.attrs['RecordType'] = 'function'
g.attrs['Empty'] = 'no'
g.attrs['Deflate'] = 0
g.attrs['Description'] = 'Sine function'

ds = g.create_dataset(
'my function',
shape=(1, 10280),
maxshape=(1, 10280),
dtype='<u1',
chunks=None,
fillvalue=0,
)
ds.attrs['RecordType'] = 'function'
ds.attrs['Command'] = 'sin'
ds.attrs['Empty'] = 'no'
import numpy as np
from scipy import sparse

from sdafile import SDAFile


EXAMPLE_A1 = np.zeros(5, dtype=np.float64)

EXAMPLE_A2 = np.empty((4, 3), dtype=np.complex128)
EXAMPLE_A2.real = 0
EXAMPLE_A2.imag = 1

EXAMPLE_A3 = sparse.eye(5).tocoo()

EXAMPLE_A4 = np.nan


def make_example_data(filename):

sda_file = SDAFile(filename, 'w')

sda_file.insert("example A1", EXAMPLE_A1, "5x1 zeros")

sda_file.insert("example A2", EXAMPLE_A2, "4x3 imaginary numbers")

sda_file.insert("example A3", EXAMPLE_A3, "5x5 sparse matrix")

sda_file.insert("example A4", np.nan, "Empty array")

sda_file.insert("example B", True, "Logical scalar")

sda_file.insert("example C", "Here is some text", "Some text")

desc = "Cell array combining examples A1 and A2"
sda_file.insert("example E", [EXAMPLE_A1, EXAMPLE_A2], desc)

desc = "Structure combining examples A1 and A2"
a1a2 = {"A1": EXAMPLE_A1, "A2": EXAMPLE_A2}
sda_file.insert("example F", a1a2, desc)

desc = "Structure array combining examples A1 and A2 (repeated)"
cell = np.array([a1a2, a1a2], dtype=object).reshape(2, 1)
sda_file.insert("example G", cell, desc)

desc = "Cell array of structures combining examples A1-A4"
a3a4 = {"A3": EXAMPLE_A3, "A4": EXAMPLE_A4}
cell = np.array([a1a2, a3a4], dtype=object).reshape(2, 1)
sda_file.insert("example H", cell, desc)


if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument('filename', help="The name of the file to create")
parser.add_argument(
"filename",
help="The name of the file to create",
nargs="?",
default="SDAreference_py.sda",
)

args = parser.parse_args()
make_test_data(args.filename)
make_example_data(args.filename)
19 changes: 7 additions & 12 deletions sdafile/sda_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,6 @@ def _insert_composite_data(self, grp, deflate, record_type, data, extra):
raise ValueError(record_type)

attrs['Empty'] = 'yes' if nr == 0 else 'no'
set_encoded(grp.attrs, **attrs)

for label, sub_data in zip(labels, data):
sub_rec_type, sub_data, sub_extra = infer_record_type(sub_data)
Expand All @@ -576,21 +575,17 @@ def _insert_composite_data(self, grp, deflate, record_type, data, extra):
sub_grp, deflate, sub_rec_type, sub_data, sub_extra
)

# Do this last because primitive sub-records can modify the Empty
# attribute.
set_encoded(grp.attrs, **attrs)

def _insert_primitive_data(self, grp, label, deflate, record_type, data,
extra):
""" Prepare primitive data for storage and store it. """
data, original_shape = coerce_primitive(record_type, data, extra)
empty = 'no'
if np.isscalar(data) or data.shape == ():
compression = None
maxshape = None
if np.isnan(data):
empty = 'yes'
else:
compression = deflate
maxshape = (None,) * data.ndim
if np.squeeze(data).shape == (0,):
empty = 'yes'
empty = 'yes' if (np.isnan(data).all() or data.size == 0) else 'no'
compression = deflate
maxshape = (None,) * data.ndim

with self._h5file('r+') as h5file:
set_encoded(
Expand Down
1 change: 1 addition & 0 deletions sdafile/tests/test_sda_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ def test_cell(self):
record_size = np.atleast_2d(objs).shape
else:
record_size = (1, len(objs))

self.assertCompositeRecord(
sda_file,
label,
Expand Down
21 changes: 7 additions & 14 deletions sdafile/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import unittest

import numpy as np
from numpy.testing import assert_array_equal
from numpy.testing import assert_array_equal, assert_equal
from scipy.sparse import coo_matrix

from sdafile.exceptions import BadSDAFile
Expand Down Expand Up @@ -64,7 +64,7 @@ def test_unnest(self):
def test_coerce_character(self):
coerced = coerce_character(string.printable)
self.assertEqual(coerced.dtype, np.dtype(np.uint8))
expected = np.array([ord(c) for c in string.printable], np.uint8)
expected = np.array([[ord(c) for c in string.printable]], np.uint8)
assert_array_equal(coerced, expected)

def test_coerce_primitive(self):
Expand Down Expand Up @@ -108,7 +108,7 @@ def test_coerce_logical(self):
x = np.array([True, False, True, True])
coerced = coerce_logical(x)
self.assertEqual(coerced.dtype, np.dtype(np.uint8))
assert_array_equal(coerced, [1, 0, 1, 1])
assert_array_equal(coerced, [[1, 0, 1, 1]])

def test_coerce_numeric(self):
for data, typ in TEST_SCALARS:
Expand All @@ -118,8 +118,7 @@ def test_coerce_numeric(self):
for data, typ in TEST_ARRAYS:
if typ == 'numeric' and isinstance(data, np.ndarray):
coerced = coerce_numeric(data)
assert_array_equal(coerced, data)
self.assertEqual(coerced.dtype, data.dtype)
assert_array_equal(coerced, np.atleast_2d(data))

def test_coerce_sparse(self):
row = np.array([3, 4, 5, 6])
Expand Down Expand Up @@ -216,21 +215,15 @@ def test_extract_logical(self):
self.assertEqual(extract_logical(0), False)

expected = np.array([True, False, True, True], dtype=bool)
stored = np.array([1, 0, 1, 1], dtype=np.uint8)
stored = np.array([[1, 0, 1, 1]], dtype=np.uint8)
extracted = extract_logical(stored)
self.assertEqual(extracted.dtype, expected.dtype)
assert_array_equal(extracted, expected)

def test_extract_numeric(self):
for data, typ in TEST_SCALARS:
for data, typ in TEST_SCALARS + TEST_ARRAYS:
if typ == 'numeric':
self.assertEqual(extract_numeric(data), data)

for data, typ in TEST_ARRAYS:
if typ == 'numeric' and isinstance(data, np.ndarray):
extracted = extract_numeric(data)
assert_array_equal(extracted, data)
self.assertEqual(data.dtype, extracted.dtype)
assert_equal(extract_numeric(coerce_numeric(data)), data)

def test_extract_sparse(self):
row = np.array([0, 2])
Expand Down
47 changes: 25 additions & 22 deletions sdafile/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def coerce_character(data):
"""
data = np.frombuffer(data.encode('ascii'), np.uint8)
return data
return np.atleast_2d(data)


def coerce_complex(data):
Expand All @@ -151,7 +151,7 @@ def coerce_complex(data):
and type float64 if the input type is complex128 (or equivalent).
"""
data = np.asarray(data).ravel(order='F')
data = np.atleast_2d(data).ravel(order='F')
return np.array([data.real, data.imag])


Expand All @@ -174,7 +174,7 @@ def coerce_logical(data):
data = np.uint8(1 if data else 0)
else:
data = data.astype(np.uint8).clip(0, 1)
return data
return np.atleast_2d(data)


def coerce_numeric(data):
Expand All @@ -187,11 +187,11 @@ def coerce_numeric(data):
Returns
-------
data : array-like or scalar
This function does not modify the input data.
coerced : array-like or scalar
The data with at least 2 dimensions
"""
return data
return np.atleast_2d(data)


def coerce_sparse(data):
Expand Down Expand Up @@ -314,13 +314,6 @@ def extract_primitive(record_type, data, data_attrs):
extracted = extract_complex(data, shape.astype(int))
else:
extracted = extract_numeric(data)
# squeeze leading dimension if this is a MATLAB row array
if extracted.ndim == 2 and extracted.shape[0] == 1:
# if it's a scalar, go all the way
if extracted.shape[1] == 1:
extracted = extracted[0, 0]
else:
extracted = np.squeeze(extracted, axis=0)
elif record_type == 'logical':
extracted = extract_logical(data)
elif record_type == 'character':
Expand Down Expand Up @@ -365,28 +358,26 @@ def extract_complex(data, shape):
"""
extracted = 1j * data[1]
extracted.real = data[0]
return extracted.reshape(shape, order='F')
extracted = extracted.reshape(shape, order='F')
return reduce_array(extracted)


def extract_logical(data):
""" Extract 'logical' data from uint8 stored form.
Parameters
-----------
data : ndarray or scalar
Array or scalar of uint8 values clipped to 0 or 1
data : ndarray
Array of uint8 values clipped to 0 or 1
Returns
-------
extracted :
The extracted boolean or boolean array
"""
if np.isscalar(data):
data = bool(data)
else:
data = data.astype(bool)
return data
data = np.asarray(data, dtype=bool)
return reduce_array(data)


def extract_numeric(data):
Expand All @@ -403,7 +394,7 @@ def extract_numeric(data):
The input data
"""
return data
return reduce_array(data)


def extract_sparse(data):
Expand Down Expand Up @@ -678,6 +669,18 @@ def get_decoded(dict_like, *attrs):
}


def reduce_array(arr):
""" Reduce a 2d row-array or scalar to 1 or 0 dimensions, respectively. """
# squeeze leading dimension if this is a MATLAB row array
if arr.ndim == 2 and arr.shape[0] == 1:
# if it's a scalar, go all the way
if arr.shape[1] == 1:
arr = arr[0, 0]
else:
arr = np.squeeze(arr, axis=0)
return arr


def unnest(data):
""" Provide paths for structure mappings. """
items = [('', data)]
Expand Down

0 comments on commit b07d3be

Please sign in to comment.