Skip to content

Commit

Permalink
FIXTEST: Fixing various bugs to make ?all? the unit tests pass.
Browse files Browse the repository at this point in the history
* WHen index_type is slice, the interpolation manager should not over-write it to be np.ndarray again -- fixed
* In NearestNeightborInterpolator
    * the bounds attr was stripped out of the datarray when removing nans -- fixed
    * For stacked coords, when respecting bounds, the 'any' call collapsed the whole array, should have only been along the column.
* In the Selector, all testing was done one square uniform coordinates, but this causes errors when stacking coordinates in one case -- fixed
    * Added a test for this to avoid problem in the future
  • Loading branch information
mpu-creare committed Nov 16, 2020
1 parent 4fa60c9 commit 2c5640c
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 24 deletions.
7 changes: 5 additions & 2 deletions podpac/core/interpolation/interpolation_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,11 +479,14 @@ def select_coordinates(self, source_coordinates, eval_coordinates, index_type="n
if d not in selected_coords: # Some coordinates may not have a selector when heterogeneous
selected_coords[d] = source_coordinates[d]
# np.ix_ call doesn't work with slices, and fancy numpy indexing does not work well with mixed slice/index
if isinstance(selected_coords_idx[d], slice):
if isinstance(selected_coords_idx[d], slice) and index_type != "slice":
selected_coords_idx[d] = np.arange(selected_coords[d].size)

selected_coords = Coordinates([selected_coords[k] for k in source_coordinates.dims], source_coordinates.dims)
selected_coords_idx2 = np.ix_(*[selected_coords_idx[k].ravel() for k in source_coordinates.dims])
if index_type != "slice":
selected_coords_idx2 = np.ix_(*[selected_coords_idx[k].ravel() for k in source_coordinates.dims])
else:
selected_coords_idx2 = tuple([selected_coords_idx[d] for d in source_coordinates.dims])
return selected_coords, tuple(selected_coords_idx2)

def interpolate(self, source_coordinates, source_data, eval_coordinates, output_data):
Expand Down
22 changes: 13 additions & 9 deletions podpac/core/interpolation/nearest_neighbor_interpolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from six import string_types

import numpy as np
import xarray as xr
import traitlets as tl
from scipy.spatial import cKDTree

Expand Down Expand Up @@ -68,9 +69,6 @@ def interpolate(self, udims, source_coordinates, source_data, eval_coordinates,
"""
# Note, some of the following code duplicates code in the Selector class.
# This duplication is for the sake of optimization
if self.remove_nan:
# Eliminate nans from the source data. Note, this could turn a uniform griddted dataset into a stacked one
source_data, source_coordinates = self._remove_nans(source_data, source_coordinates)

def is_stacked(d):
return "_" in d
Expand All @@ -82,6 +80,12 @@ def is_stacked(d):
self._atime_to_float(b, source_coordinates["time"], eval_coordinates["time"])
for b in bounds["time"]
]
else:
bounds = {d: None for d in source_coordinates.dims}

if self.remove_nan:
# Eliminate nans from the source data. Note, this could turn a uniform griddted dataset into a stacked one
source_data, source_coordinates = self._remove_nans(source_data, source_coordinates)

data_index = []
for d in source_coordinates.dims:
Expand Down Expand Up @@ -116,7 +120,7 @@ def is_stacked(d):
return output_data

def _remove_nans(self, source_data, source_coordinates):
index = np.isnan(source_data)
index = np.array(np.isnan(source_data), bool)
if not np.any(index):
return source_data, source_coordinates

Expand Down Expand Up @@ -178,7 +182,7 @@ def _get_stacked_index(self, dim, source, request, bounds=None):
scales = np.array([self._get_scale(d, time_source, time_request) for d in udims])[None, :]
tol = np.linalg.norm((tols * scales).squeeze())
src_coords, req_coords_diag = _higher_precision_time_stack(source, request, udims)
ckdtree_source = cKDTree(src_coords * scales)
ckdtree_source = cKDTree(src_coords.T * scales)

# if the udims are all stacked in the same stack as part of the request coordinates, then we're done.
# Otherwise we have to evaluate each unstacked set of dimensions independently
Expand All @@ -187,9 +191,9 @@ def _get_stacked_index(self, dim, source, request, bounds=None):
stacked = {d for d in request.dims for ud in udims if ud in d and request.is_stacked(ud)}

if (len(indep_evals) + len(stacked)) <= 1: # output is stacked in the same way
req_coords = req_coords_diag
req_coords = req_coords_diag.T
elif (len(stacked) == 0) | (len(indep_evals) == 0 and len(stacked) == len(udims)):
req_coords = np.stack([i.ravel() for i in np.meshgrid(*req_coords_diag.T, indexing="ij")], axis=1)
req_coords = np.stack([i.ravel() for i in np.meshgrid(*req_coords_diag, indexing="ij")], axis=1)
else:
# Rare cases? E.g. lat_lon_time_alt source to lon, time_alt, lat destination
sizes = [request[d].size for d in request.dims]
Expand All @@ -199,7 +203,7 @@ def _get_stacked_index(self, dim, source, request, bounds=None):
ii = [ii for ii in range(len(request.dims)) if udims[i] in request.dims[ii]][0]
reshape[:] = 1
reshape[ii] = -1
coords[i] = req_coords_diag[:, i].reshape(*reshape)
coords[i] = req_coords_diag[i].reshape(*reshape)
for j, d in enumerate(request.dims):
if udims[i] in d: # Then we don't need to repeat
continue
Expand All @@ -211,7 +215,7 @@ def _get_stacked_index(self, dim, source, request, bounds=None):
if self.respect_bounds:
if bounds is None:
bounds = [src_coords.min(0), src_coords.max(0)]
index[np.any((req_coords > bounds[1])) | np.any((req_coords < bounds[0]))] = -1
index[np.any((req_coords > bounds[1]), axis=1) | np.any((req_coords < bounds[0]), axis=1)] = -1

if tol and tol != np.inf:
index[dist > tol] = -1
Expand Down
27 changes: 14 additions & 13 deletions podpac/core/interpolation/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,16 @@
def _higher_precision_time_stack(coords0, coords1, dims):
crds0 = []
crds1 = []
lens = []
for d in dims:
c0, c1 = _higher_precision_time_coords1d(coords0[d], coords1[d])
crds0.append(c0)
crds1.append(c1)
return np.stack(crds0, axis=1), np.stack(crds1, axis=1)
lens.append(len(c1))
if np.all(np.array(lens) == lens[0]):
crds1 = np.stack(crds1, axis=0)

return np.stack(crds0, axis=0), crds1


def _higher_precision_time_coords1d(coords0, coords1):
Expand Down Expand Up @@ -133,25 +138,21 @@ def select_nonuniform(self, source, request, index_type):

def select_stacked(self, source, request, index_type):
udims = [ud for ud in source.udims if ud in request.udims]
src_coords, req_coords_diag = _higher_precision_time_stack(source, request, udims)
ckdtree_source = cKDTree(src_coords)
_, inds = ckdtree_source.query(req_coords_diag, k=len(self.method))
inds = inds[inds < source.coordinates.size]
inds = inds.ravel()

if np.unique(inds).size == source.size:
return inds

if len(udims) == 1:
return inds

# if the udims are all stacked in the same stack as part of the request coordinates, then we're done.
# if the udims are all stacked in the same stack as part of the request coordinates, then we can take a shortcut.
# Otherwise we have to evaluate each unstacked set of dimensions independently
indep_evals = [ud for ud in udims if not request.is_stacked(ud)]
# two udims could be stacked, but in different dim groups, e.g. source (lat, lon), request (lat, time), (lon, alt)
stacked = {d for d in request.dims for ud in udims if ud in d and request.is_stacked(ud)}

inds = np.array([])
if (len(indep_evals) + len(stacked)) <= 1:
src_coords, req_coords_diag = _higher_precision_time_stack(source, request, udims)
if isinstance(req_coords_diag, np.ndarray): # The request is stacked, or square uniform coords
ckdtree_source = cKDTree(src_coords.T)
_, inds = ckdtree_source.query(req_coords_diag.T, k=len(self.method))
inds = inds[inds < source.coordinates.size]
inds = inds.ravel()
return inds

stacked_ud = [d for s in stacked for d in s.split("_") if d in udims]
Expand Down
29 changes: 29 additions & 0 deletions podpac/core/interpolation/test/test_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,32 @@ def test_point2uniform(self):
c, ci = selector.select(u_fine, p_coarse)
for cci, trth in zip(ci, np.ix_(self.nn_request_coarse_from_fine, self.nn_request_coarse_from_fine)):
np.testing.assert_array_equal(cci, trth)

def test_point2uniform_non_square(self):
u_fine = Coordinates([self.lat_fine, self.lon_fine[:-1]], ["lat", "lon"])
u_coarse = Coordinates([self.lat_coarse[:-1], self.lon_coarse], ["lat", "lon"])

p_fine = Coordinates([[self.lat_fine, self.lon_fine]], [["lat", "lon"]])
p_coarse = Coordinates([[self.lat_coarse, self.lon_coarse]], [["lat", "lon"]])

selector = Selector("nearest")

c, ci = selector.select(u_fine, p_coarse)
for cci, trth in zip(ci, np.ix_(self.nn_request_coarse_from_fine, self.nn_request_coarse_from_fine)):
np.testing.assert_array_equal(cci, trth)

c, ci = selector.select(u_coarse, p_fine)
for cci, trth in zip(ci, np.ix_(self.nn_request_fine_from_coarse[:-1], self.nn_request_fine_from_coarse)):
np.testing.assert_array_equal(cci, trth)

c, ci = selector.select(p_fine, u_coarse)
np.testing.assert_array_equal(ci, (self.nn_request_coarse_from_fine,))

c, ci = selector.select(p_coarse, u_fine)
np.testing.assert_array_equal(ci, (self.nn_request_fine_from_coarse,))

# Respect bounds
selector.respect_bounds = True
c, ci = selector.select(u_fine, p_coarse)
for cci, trth in zip(ci, np.ix_(self.nn_request_coarse_from_fine, self.nn_request_coarse_from_fine)):
np.testing.assert_array_equal(cci, trth)

0 comments on commit 2c5640c

Please sign in to comment.