Skip to content

Commit

Permalink
Better py2/py3 compat with string variables
Browse files Browse the repository at this point in the history
  • Loading branch information
kwilcox committed Sep 14, 2016
1 parent 592e3eb commit c96c860
Show file tree
Hide file tree
Showing 9 changed files with 43 additions and 58 deletions.
12 changes: 4 additions & 8 deletions pyaxiom/netcdf/sensors/dsg/profile/im.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,10 @@ def is_mine(cls, dsg):

# Allow for string variables
pvar = pvars[0]
# This diferentiates between this and an OrthogonalMultidimensionalProfile
# Incomplete files need to have a profile dimension (at least 1 dim).
minimum_dimensions = 1
maximum_dimensions = 2
if np.issubdtype(pvar.dtype, 'S'):
minimum_dimensions += 1
maximum_dimensions += 1
assert minimum_dimensions <= len(pvar.dimensions) <= maximum_dimensions
# 0 = single
# 1 = array of strings/ints/bytes/etc
# 2 = array of character arrays
assert 0 <= len(pvar.dimensions) <= 2

t = dsg.t_axes()[0]
x = dsg.x_axes()[0]
Expand Down
35 changes: 9 additions & 26 deletions pyaxiom/netcdf/sensors/dsg/profile/om.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,13 @@ def is_mine(cls, dsg):

# Allow for string variables
pvar = pvars[0]
minimum_dimensions = 0
maximum_dimensions = 1
if np.issubdtype(pvar.dtype, 'S'):
minimum_dimensions += 1
maximum_dimensions += 1
assert minimum_dimensions <= len(pvar.dimensions) <= maximum_dimensions
# 0 = single
# 1 = array of strings/ints/bytes/etc
# 2 = array of character arrays
assert 0 <= len(pvar.dimensions) <= 2

is_single_profile = False
if len(pvar.dimensions) == minimum_dimensions:
is_single_profile = True
ps = normalize_array(pvar)
is_single = ps.size == 1

t = dsg.t_axes()[0]
x = dsg.x_axes()[0]
Expand All @@ -57,7 +54,7 @@ def is_mine(cls, dsg):
assert len(z.dimensions) == 1
z_dim = dsg.dimensions[z.dimensions[0]]

if is_single_profile:
if is_single:
assert t.size == 1
assert x.size == 1
assert y.size == 1
Expand Down Expand Up @@ -127,31 +124,17 @@ def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_r
)

def to_dataframe(self, clean_cols=True, clean_rows=True):
pvar = self.get_variables_by_attributes(cf_role='profile_id')[0]

minimum_dimensions = 0
if np.issubdtype(pvar.dtype, 'S'):
minimum_dimensions += 1
if len(pvar.dimensions) == minimum_dimensions:
# Single profile
ps = 1
else:
try:
# Multiple profiles in the file
ps = len(self.dimensions[pvar.dimensions[0]])
except IndexError:
# Single profile in the file
ps = 1
logger.debug(['# profiles: ', ps])

zvar = self.z_axes()[0]
zs = len(self.dimensions[zvar.dimensions[0]])

# Profiles
pvar = self.get_variables_by_attributes(cf_role='profile_id')[0]
try:
p = normalize_array(pvar)
except ValueError:
p = np.asarray(list(range(len(pvar))), dtype=np.integer)
ps = p.size
p = p.repeat(zs)
logger.debug(['profile data size: ', p.size])

Expand Down
17 changes: 7 additions & 10 deletions pyaxiom/netcdf/sensors/dsg/trajectory/im.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,13 @@ def is_mine(cls, dsg):

# Allow for string variables
tvar = tvars[0]
minimum_dimensions = 0
maximum_dimensions = 1
if np.issubdtype(tvar.dtype, 'S'):
minimum_dimensions += 1
maximum_dimensions += 1
assert minimum_dimensions <= len(tvar.dimensions) <= maximum_dimensions
# 0 = single
# 1 = array of strings/ints/bytes/etc
# 2 = array of character arrays
assert 0 <= len(tvar.dimensions) <= 2

is_single_trajectory = False
if len(tvar.dimensions) == minimum_dimensions:
is_single_trajectory = True
ts = normalize_array(tvar)
is_single = ts.size == 1

t = dsg.t_axes()[0]
x = dsg.x_axes()[0]
Expand All @@ -59,7 +56,7 @@ def is_mine(cls, dsg):
assert t.dimensions == x.dimensions == y.dimensions == z.dimensions
assert t.size == x.size == y.size == z.size

if is_single_trajectory:
if is_single:
assert len(t.dimensions) == 1
time_dim = dsg.dimensions[t.dimensions[0]]
for dv in dsg.data_vars():
Expand Down
14 changes: 6 additions & 8 deletions pyaxiom/netcdf/sensors/dsg/trajectoryProfile/cr.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,10 @@ def is_mine(cls, dsg):

# Allow for string variables
rvar = rvars[0]
minimum_dimensions = 0
maximum_dimensions = 1
if np.issubdtype(rvar.dtype, 'S'):
minimum_dimensions += 1
maximum_dimensions += 1
assert minimum_dimensions <= len(rvar.dimensions) <= maximum_dimensions
# 0 = single
# 1 = array of strings/ints/bytes/etc
# 2 = array of character arrays
assert 0 <= len(rvar.dimensions) <= 2

except BaseException:
return False
Expand Down Expand Up @@ -170,7 +168,7 @@ def to_dataframe(self, clean_cols=True, clean_rows=True):

p = np.empty(0, dtype=profile_indexes.dtype)
r = np.empty(0, dtype=traj_indexes.dtype)
t = np.empty(0, dtype=tvar.dtype)
t = np.empty(0, dtype=tvar.dtype)
x = np.empty(0, dtype=xvar.dtype)
y = np.empty(0, dtype=yvar.dtype)
for i in range(profile_indexes.size):
Expand All @@ -188,7 +186,7 @@ def to_dataframe(self, clean_cols=True, clean_rows=True):
d = np.ma.fix_invalid(np.ma.MaskedArray(np.cumsum(d)).astype(np.float64).round(2))

# Sample dimension

z = np.ma.fix_invalid(np.ma.MaskedArray(zvar[:].astype(np.float64)))
z = z.flatten().round(5)

Expand Down
2 changes: 1 addition & 1 deletion pyaxiom/tests/dsg/profile/test_profile_om.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_omp_calculated_metadata(self):
assert np.isclose(m.profiles[37].y, 60.01)

def test_json_attributes(self):
ds = os.path.join(os.path.dirname(__file__), 'resources', '1dy11.original.nc')
ds = os.path.join(os.path.dirname(__file__), 'resources', 'om-1dy11.nc')
om = OrthogonalMultidimensionalProfile(ds)
om.to_dataframe()
om.json_attributes()
1 change: 1 addition & 0 deletions pyaxiom/tests/dsg/test_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
@pytest.mark.parametrize("klass,fp", [
(OrthogonalMultidimensionalProfile, os.path.join(os.path.dirname(__file__), 'profile', 'resources', 'om-single.nc')),
(OrthogonalMultidimensionalProfile, os.path.join(os.path.dirname(__file__), 'profile', 'resources', 'om-multiple.nc')),
(OrthogonalMultidimensionalProfile, os.path.join(os.path.dirname(__file__), 'profile', 'resources', 'om-1dy11.nc')),
(IncompleteMultidimensionalProfile, os.path.join(os.path.dirname(__file__), 'profile', 'resources', 'im-multiple.nc')),
(IncompleteMultidimensionalTrajectory, os.path.join(os.path.dirname(__file__), 'trajectory', 'resources', 'im-single.nc')),
(IncompleteMultidimensionalTrajectory, os.path.join(os.path.dirname(__file__), 'trajectory', 'resources', 'im-multiple.nc')),
Expand Down
4 changes: 4 additions & 0 deletions pyaxiom/tests/dsg/trajectory/test_trajectory_im.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,7 @@ def test_crt_calculated_metadata(self):
assert m.trajectories["Trajectory3"].max_t == dtparse('1990-01-02 12:00:00')
assert m.trajectories["Trajectory3"].first_loc.x == -73.3026
assert m.trajectories["Trajectory3"].first_loc.y == 1.95761

def test_json_attributes(self):
IncompleteMultidimensionalTrajectory(self.single).json_attributes()
IncompleteMultidimensionalTrajectory(self.multi).json_attributes()
16 changes: 11 additions & 5 deletions pyaxiom/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import simplejson as json

import numpy as np
import netCDF4 as nc4

from pyaxiom.urn import IoosUrn
from pyaxiom import logger
Expand Down Expand Up @@ -50,11 +51,16 @@ def normalize_array(var):
used to normalize string types between py2 and py3. It has no effect on types
other than chars/strings
"""
if np.issubdtype(var.dtype, 'S'):
if len(var.dimensions) == 1:
return np.asarray([var[:].tostring().decode('utf-8')])
else:
return np.asarray([ s.tostring().decode('utf-8') for s in var[:] ])
if np.issubdtype(var.dtype, 'S1'):
if var.dtype == str:
# Python 2 on netCDF4 'string' variables needs this.
# Python 3 returns false for np.issubdtype(var.dtype, 'S1')
return var[:]

def decoder(x):
return str(x.decode('utf-8'))
vfunc = np.vectorize(decoder)
return vfunc(nc4.chartostring(var[:]))
else:
return var[:]

Expand Down

0 comments on commit c96c860

Please sign in to comment.