Skip to content

Commit

Permalink
Fix for JASCIS-320. The collocation routines now return masked arrays…
Browse files Browse the repository at this point in the history
… with the appropriate fill values (which default to NaN now rather than Inf). Updated some of the tests to reflect the change.
  • Loading branch information
duncanwp committed Aug 5, 2016
1 parent 29fde07 commit a0904f4
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 73 deletions.
4 changes: 2 additions & 2 deletions cis/collocation/col_framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(self, fill_value=None, var_name='', var_long_name='', var_units='',
"""
Initialise the fill_value, missing data flag and variable attributes.
:param fill_value: The value to use when the kernel is unable to return a value. The default is inf.
:param fill_value: The value to use when the kernel is unable to return a value. The default is NaN.
:param var_name: The name of the variable to use when creating the output data object
:param var_long_name: The long name of the variable to use when creating the output data object
:param var_units: The units of the variable to use when creating the output data object
Expand All @@ -23,7 +23,7 @@ def __init__(self, fill_value=None, var_name='', var_long_name='', var_units='',
:return:
"""
import numpy as np
self.fill_value = float(fill_value) if fill_value is not None else np.inf
self.fill_value = float(fill_value) if fill_value is not None else np.nan
self.var_name = var_name
self.var_long_name = var_long_name
self.var_units = var_units
Expand Down
31 changes: 9 additions & 22 deletions cis/collocation/col_implementations.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,10 @@ class GeneralUngriddedCollocator(Collocator):
Collocator for locating onto ungridded sample points
"""

def __init__(self, fill_value=None, var_name='', var_long_name='', var_units='',
def __init__(self, fill_value=np.nan, var_name='', var_long_name='', var_units='',
missing_data_for_missing_sample=False):
super(GeneralUngriddedCollocator, self).__init__()
if fill_value is not None:
try:
self.fill_value = float(fill_value)
except ValueError:
raise cis.exceptions.InvalidCommandLineOptionError(
'Dummy Constraint fill_value must be a valid float')
self.fill_value = float(fill_value)
self.var_name = var_name
self.var_long_name = var_long_name
self.var_units = var_units
Expand Down Expand Up @@ -92,7 +87,9 @@ def collocate(self, points, data, constraint, kernel):
self.var_standard_name, self.var_units)

sample_points_count = len(sample_points)
values = np.zeros((len(var_set_details), sample_points_count)) + self.fill_value
# Create an empty masked array to store the collocated values. The elements will be unmasked by assignment.
values = np.ma.masked_all((len(var_set_details), sample_points_count))
values.fill_value = self.fill_value
log_memory_profile("GeneralUngriddedCollocator after output array creation")

logging.info(" {} sample points".format(sample_points_count))
Expand Down Expand Up @@ -151,15 +148,10 @@ class GriddedUngriddedCollocator(Collocator):
Collocator for locating GriddedData onto ungridded sample points
"""

def __init__(self, fill_value=None, var_name='', var_long_name='', var_units='',
def __init__(self, fill_value=np.nan, var_name='', var_long_name='', var_units='',
missing_data_for_missing_sample=False, extrapolate=False):
super(GriddedUngriddedCollocator, self).__init__()
if fill_value is not None:
try:
self.fill_value = float(fill_value)
except ValueError:
raise cis.exceptions.InvalidCommandLineOptionError(
'Dummy Constraint fill_value must be a valid float')
self.fill_value = float(fill_value)
self.var_name = var_name
self.var_long_name = var_long_name
self.var_units = var_units
Expand Down Expand Up @@ -790,15 +782,10 @@ class GeneralGriddedCollocator(Collocator):
"""Performs collocation of data on to the points of a cube (ie onto a gridded dataset).
"""

def __init__(self, fill_value=None, var_name='', var_long_name='', var_units='',
def __init__(self, fill_value=np.nan, var_name='', var_long_name='', var_units='',
missing_data_for_missing_sample=False):
super(GeneralGriddedCollocator, self).__init__()
if fill_value is not None:
try:
self.fill_value = float(fill_value)
except ValueError:
raise cis.exceptions.InvalidCommandLineOptionError(
'Dummy Constraint fill_value must be a valid float')
self.fill_value = float(fill_value)
self.var_name = var_name
self.var_long_name = var_long_name
self.var_units = var_units
Expand Down
2 changes: 1 addition & 1 deletion cis/collocation/gridded_interpolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def __call__(self, values, fill_value=np.nan):
result = self._interp(values, self.indices, self.norm_distances)

if fill_value is not None:
result[self.out_of_bounds] = fill_value
result = np.ma.array(result, mask=self.out_of_bounds, fill_value=fill_value)

return result

Expand Down
60 changes: 30 additions & 30 deletions cis/test/unit/aggregation/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,9 +545,9 @@ def test_aggregating_single_point_in_one_dimension(self):
agg = Aggregator(data, grid)
cube_out = agg.aggregate_ungridded(self.kernel)

result = numpy.ma.array([[0], [0], [1.0], [0], [0]], mask=[[1], [1], [0], [1], [1]], fill_value=float('inf'))
result = numpy.ma.array([[0], [0], [1.0], [0], [0]], mask=[[1], [1], [0], [1], [1]], fill_value=float('nan'))

assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))

@istest
def test_can_name_variables_by_standard_name(self):
Expand All @@ -561,9 +561,9 @@ def test_can_name_variables_by_standard_name(self):
agg = Aggregator(data, grid)
cube_out = agg.aggregate_ungridded(self.kernel)

result = numpy.ma.array([[0], [0], [1.0], [0], [0]], mask=[[1], [1], [0], [1], [1]], fill_value=float('inf'))
result = numpy.ma.array([[0], [0], [1.0], [0], [0]], mask=[[1], [1], [0], [1], [1]], fill_value=float('nan'))

assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))

@istest
def test_aggregating_single_point_in_one_dimension_lower_bound_edge_case(self):
Expand All @@ -578,9 +578,9 @@ def test_aggregating_single_point_in_one_dimension_lower_bound_edge_case(self):
agg = Aggregator(data, grid)
cube_out = agg.aggregate_ungridded(self.kernel)

result = numpy.ma.array([[0], [0], [1.0], [0], [0]], mask=[[1], [1], [0], [1], [1]], fill_value=float('inf'))
result = numpy.ma.array([[0], [0], [1.0], [0], [0]], mask=[[1], [1], [0], [1], [1]], fill_value=float('nan'))

assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))

@istest
def test_aggregating_single_point_in_one_dimension_upper_bound_edge_case(self):
Expand All @@ -595,9 +595,9 @@ def test_aggregating_single_point_in_one_dimension_upper_bound_edge_case(self):
agg = Aggregator(data, grid)
cube_out = agg.aggregate_ungridded(self.kernel)

result = numpy.ma.array([[0], [0], [0.0], [1.0], [0]], mask=[[1], [1], [1], [0], [1]], fill_value=float('inf'))
result = numpy.ma.array([[0], [0], [0.0], [1.0], [0]], mask=[[1], [1], [1], [0], [1]], fill_value=float('nan'))

assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))

@istest
def test_aggregating_edge_cases(self):
Expand All @@ -619,7 +619,7 @@ def test_aggregating_edge_cases(self):
[10.0, 11.0]]) # 12.0],
# [13.0, 14.0, 15.0]],

assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))

@istest
def test_aggregating_simple_dataset_in_two_dimensions_with_missing_values(self):
Expand All @@ -639,9 +639,9 @@ def test_aggregating_simple_dataset_in_two_dimensions_with_missing_values(self):
[0, 1, 0],
[0, 0, 1],
[0, 0, 0],
[1, 0, 0]], fill_value=float('inf'))
[1, 0, 0]], fill_value=float('nan'))

assert numpy.array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))

@istest
def test_mean_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
Expand All @@ -655,9 +655,9 @@ def test_mean_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
result = numpy.ma.array([[2.5, 2.0, 4.5],
[8.5, 11.0, 13.5]],
mask=[[0, 0, 0],
[0, 0, 0]], fill_value=float('inf'))
[0, 0, 0]], fill_value=float('nan'))

assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))

@istest
def test_max_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
Expand All @@ -673,9 +673,9 @@ def test_max_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
result = numpy.ma.array([[4.0, 2.0, 6.0],
[10.0, 14.0, 15.0]],
mask=[[0, 0, 0],
[0, 0, 0]], fill_value=float('inf'))
[0, 0, 0]], fill_value=float('nan'))

assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))

@istest
def test_min_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
Expand All @@ -691,9 +691,9 @@ def test_min_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
result = numpy.ma.array([[1.0, 2.0, 3.0],
[7.0, 8.0, 12.0]],
mask=[[0, 0, 0],
[0, 0, 0]], fill_value=float('inf'))
[0, 0, 0]], fill_value=float('nan'))

assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))

@istest
def test_stddev_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
Expand All @@ -709,9 +709,9 @@ def test_stddev_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
result = numpy.ma.array([[numpy.sqrt(4.5), float('NaN'), numpy.sqrt(4.5)],
[numpy.sqrt(4.5), 3.0, numpy.sqrt(4.5)]],
mask=[[0, 1, 0],
[0, 0, 0]], fill_value=float('inf'))
[0, 0, 0]], fill_value=float('nan'))

assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))

def test_aggregation_one_dim_using_moments_kernel(self):
self.kernel = moments()
Expand Down Expand Up @@ -763,14 +763,14 @@ def test_aggregating_on_grid_0_to_360_when_data_is_minus_180_to_180(self):
grid = {'x': AggregationGrid(125, 270, 40, False)}
agg = Aggregator(data, grid)
output = agg.aggregate_ungridded(self.kernel)
assert_arrays_equal(output[0].data, [[13.5, 5.5, 6.5, 7.5]])
numpy.testing.assert_array_equal(output[0].data, [[13.5, 5.5, 6.5, 7.5]])

def test_aggregating_on_grid_minus_180_to_180_when_data_is_0_to_360(self):
data = make_regular_2d_ungridded_data(lat_dim_length=2, lon_dim_length=9, lon_min=5., lon_max=325.)
grid = {'x': AggregationGrid(-75, 125, 40, False)}
agg = Aggregator(data, grid)
output = agg.aggregate_ungridded(self.kernel)
assert_arrays_equal(output[0].data, [[12.5, 13.5, 5.5, 6.5, 7.5]])
numpy.testing.assert_array_equal(output[0].data, [[12.5, 13.5, 5.5, 6.5, 7.5]])

def test_collapsed_coords_get_output_as_length_1(self):
data = make_regular_2d_ungridded_data()
Expand All @@ -786,7 +786,7 @@ def test_collapsed_coords_get_max_min_bounds(self):
agg = Aggregator(data, grid)
output = agg.aggregate_ungridded(self.kernel)
lon = output.coord('longitude')
assert_arrays_equal(lon.bounds, [[-5, 5]])
numpy.testing.assert_array_equal(lon.bounds, [[-5, 5]])

def test_aggregating_coord_to_length_one_with_explicit_bounds_gets_output_as_length_one(self):
data = make_regular_2d_ungridded_data()
Expand All @@ -802,7 +802,7 @@ def test_aggregating_to_length_one_with_explicit_bounds_get_correct_bounds(self)
agg = Aggregator(data, grid)
output = agg.aggregate_ungridded(self.kernel)
lon = output.coord('longitude')
assert_arrays_equal(lon.bounds, [[-180, 180]])
numpy.testing.assert_array_equal(lon.bounds, [[-180, 180]])


class TestUngriddedListAggregation(TestCase):
Expand All @@ -828,12 +828,12 @@ def test_aggregating_list_of_datasets_over_two_dims(self):
[0, 1, 0],
[0, 0, 1],
[0, 0, 0],
[1, 0, 0]], fill_value=float('inf'))
[1, 0, 0]], fill_value=float('nan'))

print(cube_out[0].data.fill_value)
assert len(cube_out) == 2
assert numpy.array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
assert numpy.array_equal(numpy.ma.filled(cube_out[1].data), numpy.ma.filled(result))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[1].data), numpy.ma.filled(result))

def test_aggregation_one_dim_using_moments_kernel(self):
self.kernel = moments()
Expand Down Expand Up @@ -923,7 +923,7 @@ def test_aggregating_list_of_datasets_over_two_dims_with_diff_masks(self):
[0, 1, 0],
[0, 0, 1],
[0, 0, 0],
[1, 0, 0]], fill_value=float('inf'))
[1, 0, 0]], fill_value=float('nan'))

result_1 = numpy.ma.array([[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
Expand All @@ -934,9 +934,9 @@ def test_aggregating_list_of_datasets_over_two_dims_with_diff_masks(self):
[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1]], fill_value=float('inf'))
[1, 1, 1]], fill_value=float('nan'))

print(cube_out[0].data.fill_value)
assert len(cube_out) == 2
assert numpy.array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result_1))
assert numpy.array_equal(numpy.ma.filled(cube_out[1].data), numpy.ma.filled(result_0))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result_1))
numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[1].data), numpy.ma.filled(result_0))
12 changes: 6 additions & 6 deletions cis/test/unit/colocate/test_general_gridded_col.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,13 +520,13 @@ def test_fill_value_for_cube_cell_constraint_default_fill_value(self):

out_cube = col.collocate(points=sample_cube, data=data_point, constraint=con, kernel=SlowMean())[0]

expected_result = numpy.array([[float('Inf'), float('Inf'), float('Inf')],
[float('Inf'), float('Inf'), float('Inf')],
[float('Inf'), float('Inf'), float('Inf')],
[float('Inf'), float('Inf'), float('Inf')],
[float('Inf'), float('Inf'), float('Inf')]])
expected_result = numpy.array([[float('nan'), float('nan'), float('nan')],
[float('nan'), float('nan'), float('nan')],
[float('nan'), float('nan'), float('nan')],
[float('nan'), float('nan'), float('nan')],
[float('nan'), float('nan'), float('nan')]])

assert numpy.array_equal(out_cube.data.filled(), expected_result)
numpy.testing.assert_array_equal(out_cube.data.filled(), expected_result)

def test_single_point_results_in_single_value_in_cell(self):
con = CubeCellConstraint()
Expand Down
5 changes: 2 additions & 3 deletions cis/test/unit/colocate/test_general_ungridded_col.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,17 @@ def test_list_ungridded_ungridded_box_mean(self):
output = col.collocate(sample_points, data_list, constraint, kernel)

expected_result = np.array(list(range(1, 16)))
expected_stddev = np.array(15 * [float('inf')])
expected_n = np.array(15 * [1])
assert len(output) == 6
assert isinstance(output, UngriddedDataList)
assert output[3].var_name == 'snow'
assert output[4].var_name == 'snow_std_dev'
assert output[5].var_name == 'snow_num_points'
assert np.allclose(output[0].data, expected_result)
assert np.allclose(output[1].data, expected_stddev)
assert all(output[1].data.mask)
assert np.allclose(output[2].data, expected_n)
assert np.allclose(output[3].data, expected_result + 3)
assert np.allclose(output[4].data, expected_stddev)
assert all(output[4].data.mask)
assert np.allclose(output[5].data, expected_n)

def test_list_gridded_ungridded_box_moments(self):
Expand Down
6 changes: 3 additions & 3 deletions cis/test/unit/colocate/test_gridded_interpolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,10 @@ def test_out_of_bounds_fill(self):
sample = np.asarray([[-.1, -.1, -.1, -.1], [1.1, 1.1, 1.1, 1.1],
[2.1, 2.1, -1.1, -1.1]])
interp = _RegularGridInterpolator(points, sample.T, method="nearest")
wanted = np.asarray([np.nan, np.nan, np.nan])
assert_array_almost_equal(interp(values, fill_value=np.nan), wanted)
# Assert that all of the elements are masked
assert all(interp(values, fill_value=np.nan).mask)
interp = _RegularGridInterpolator(points, sample.T, method="linear")
assert_array_almost_equal(interp(values, fill_value=np.nan), wanted)
assert all(interp(values, fill_value=np.nan).mask)

sample = np.asarray([[0.1, 0.1, 1., .9], [0.2, 0.1, .45, .8],
[0.5, 0.5, .5, .5]])
Expand Down

0 comments on commit a0904f4

Please sign in to comment.