Fix for JASCIS-320. The collocation routines now return masked arrays…

… with the appropriate fill values (which default to NaN now rather than Inf). Updated some of the tests to reflect the change.
cedadev · Aug 5, 2016 · a0904f4 · a0904f4
1 parent 29fde07
commit a0904f4
Show file tree

Hide file tree

Showing 8 changed files with 59 additions and 73 deletions.
diff --git a/cis/collocation/col_framework.py b/cis/collocation/col_framework.py
@@ -14,7 +14,7 @@ def __init__(self, fill_value=None, var_name='', var_long_name='', var_units='',
         """
         Initialise the fill_value, missing data flag and variable attributes.
 
-        :param fill_value: The value to use when the kernel is unable to return a value. The default is inf.
+        :param fill_value: The value to use when the kernel is unable to return a value. The default is NaN.
         :param var_name: The name of the variable to use when creating the output data object
         :param var_long_name: The long name of the variable to use when creating the output data object
         :param var_units: The units of the variable to use when creating the output data object
@@ -23,7 +23,7 @@ def __init__(self, fill_value=None, var_name='', var_long_name='', var_units='',
         :return:
         """
         import numpy as np
-        self.fill_value = float(fill_value) if fill_value is not None else np.inf
+        self.fill_value = float(fill_value) if fill_value is not None else np.nan
         self.var_name = var_name
         self.var_long_name = var_long_name
         self.var_units = var_units

diff --git a/cis/collocation/col_implementations.py b/cis/collocation/col_implementations.py
@@ -23,15 +23,10 @@ class GeneralUngriddedCollocator(Collocator):
     Collocator for locating onto ungridded sample points
     """
 
-    def __init__(self, fill_value=None, var_name='', var_long_name='', var_units='',
+    def __init__(self, fill_value=np.nan, var_name='', var_long_name='', var_units='',
                  missing_data_for_missing_sample=False):
         super(GeneralUngriddedCollocator, self).__init__()
-        if fill_value is not None:
-            try:
-                self.fill_value = float(fill_value)
-            except ValueError:
-                raise cis.exceptions.InvalidCommandLineOptionError(
-                    'Dummy Constraint fill_value must be a valid float')
+        self.fill_value = float(fill_value)
         self.var_name = var_name
         self.var_long_name = var_long_name
         self.var_units = var_units
@@ -92,7 +87,9 @@ def collocate(self, points, data, constraint, kernel):
                                                       self.var_standard_name, self.var_units)
 
         sample_points_count = len(sample_points)
-        values = np.zeros((len(var_set_details), sample_points_count)) + self.fill_value
+        # Create an empty masked array to store the collocated values. The elements will be unmasked by assignment.
+        values = np.ma.masked_all((len(var_set_details), sample_points_count))
+        values.fill_value = self.fill_value
         log_memory_profile("GeneralUngriddedCollocator after output array creation")
 
         logging.info("    {} sample points".format(sample_points_count))
@@ -151,15 +148,10 @@ class GriddedUngriddedCollocator(Collocator):
     Collocator for locating GriddedData onto ungridded sample points
     """
 
-    def __init__(self, fill_value=None, var_name='', var_long_name='', var_units='',
+    def __init__(self, fill_value=np.nan, var_name='', var_long_name='', var_units='',
                  missing_data_for_missing_sample=False, extrapolate=False):
         super(GriddedUngriddedCollocator, self).__init__()
-        if fill_value is not None:
-            try:
-                self.fill_value = float(fill_value)
-            except ValueError:
-                raise cis.exceptions.InvalidCommandLineOptionError(
-                    'Dummy Constraint fill_value must be a valid float')
+        self.fill_value = float(fill_value)
         self.var_name = var_name
         self.var_long_name = var_long_name
         self.var_units = var_units
@@ -790,15 +782,10 @@ class GeneralGriddedCollocator(Collocator):
     """Performs collocation of data on to the points of a cube (ie onto a gridded dataset).
     """
 
-    def __init__(self, fill_value=None, var_name='', var_long_name='', var_units='',
+    def __init__(self, fill_value=np.nan, var_name='', var_long_name='', var_units='',
                  missing_data_for_missing_sample=False):
         super(GeneralGriddedCollocator, self).__init__()
-        if fill_value is not None:
-            try:
-                self.fill_value = float(fill_value)
-            except ValueError:
-                raise cis.exceptions.InvalidCommandLineOptionError(
-                    'Dummy Constraint fill_value must be a valid float')
+        self.fill_value = float(fill_value)
         self.var_name = var_name
         self.var_long_name = var_long_name
         self.var_units = var_units

diff --git a/cis/collocation/gridded_interpolation.py b/cis/collocation/gridded_interpolation.py
@@ -285,7 +285,7 @@ def __call__(self, values, fill_value=np.nan):
         result = self._interp(values, self.indices, self.norm_distances)
 
         if fill_value is not None:
-            result[self.out_of_bounds] = fill_value
+            result = np.ma.array(result, mask=self.out_of_bounds, fill_value=fill_value)
 
         return result
 

diff --git a/cis/test/unit/aggregation/test_aggregation.py b/cis/test/unit/aggregation/test_aggregation.py
@@ -545,9 +545,9 @@ def test_aggregating_single_point_in_one_dimension(self):
         agg = Aggregator(data, grid)
         cube_out = agg.aggregate_ungridded(self.kernel)
 
-        result = numpy.ma.array([[0], [0], [1.0], [0], [0]], mask=[[1], [1], [0], [1], [1]], fill_value=float('inf'))
+        result = numpy.ma.array([[0], [0], [1.0], [0], [0]], mask=[[1], [1], [0], [1], [1]], fill_value=float('nan'))
 
-        assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
 
     @istest
     def test_can_name_variables_by_standard_name(self):
@@ -561,9 +561,9 @@ def test_can_name_variables_by_standard_name(self):
         agg = Aggregator(data, grid)
         cube_out = agg.aggregate_ungridded(self.kernel)
 
-        result = numpy.ma.array([[0], [0], [1.0], [0], [0]], mask=[[1], [1], [0], [1], [1]], fill_value=float('inf'))
+        result = numpy.ma.array([[0], [0], [1.0], [0], [0]], mask=[[1], [1], [0], [1], [1]], fill_value=float('nan'))
 
-        assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
 
     @istest
     def test_aggregating_single_point_in_one_dimension_lower_bound_edge_case(self):
@@ -578,9 +578,9 @@ def test_aggregating_single_point_in_one_dimension_lower_bound_edge_case(self):
         agg = Aggregator(data, grid)
         cube_out = agg.aggregate_ungridded(self.kernel)
 
-        result = numpy.ma.array([[0], [0], [1.0], [0], [0]], mask=[[1], [1], [0], [1], [1]], fill_value=float('inf'))
+        result = numpy.ma.array([[0], [0], [1.0], [0], [0]], mask=[[1], [1], [0], [1], [1]], fill_value=float('nan'))
 
-        assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
 
     @istest
     def test_aggregating_single_point_in_one_dimension_upper_bound_edge_case(self):
@@ -595,9 +595,9 @@ def test_aggregating_single_point_in_one_dimension_upper_bound_edge_case(self):
         agg = Aggregator(data, grid)
         cube_out = agg.aggregate_ungridded(self.kernel)
 
-        result = numpy.ma.array([[0], [0], [0.0], [1.0], [0]], mask=[[1], [1], [1], [0], [1]], fill_value=float('inf'))
+        result = numpy.ma.array([[0], [0], [0.0], [1.0], [0]], mask=[[1], [1], [1], [0], [1]], fill_value=float('nan'))
 
-        assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
 
     @istest
     def test_aggregating_edge_cases(self):
@@ -619,7 +619,7 @@ def test_aggregating_edge_cases(self):
                               [10.0, 11.0]])  # 12.0],
         # [13.0, 14.0, 15.0]],
 
-        assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
 
     @istest
     def test_aggregating_simple_dataset_in_two_dimensions_with_missing_values(self):
@@ -639,9 +639,9 @@ def test_aggregating_simple_dataset_in_two_dimensions_with_missing_values(self):
                                       [0, 1, 0],
                                       [0, 0, 1],
                                       [0, 0, 0],
-                                      [1, 0, 0]], fill_value=float('inf'))
+                                      [1, 0, 0]], fill_value=float('nan'))
 
-        assert numpy.array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
 
     @istest
     def test_mean_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
@@ -655,9 +655,9 @@ def test_mean_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
         result = numpy.ma.array([[2.5, 2.0, 4.5],
                                  [8.5, 11.0, 13.5]],
                                 mask=[[0, 0, 0],
-                                      [0, 0, 0]], fill_value=float('inf'))
+                                      [0, 0, 0]], fill_value=float('nan'))
 
-        assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
 
     @istest
     def test_max_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
@@ -673,9 +673,9 @@ def test_max_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
         result = numpy.ma.array([[4.0, 2.0, 6.0],
                                  [10.0, 14.0, 15.0]],
                                 mask=[[0, 0, 0],
-                                      [0, 0, 0]], fill_value=float('inf'))
+                                      [0, 0, 0]], fill_value=float('nan'))
 
-        assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
 
     @istest
     def test_min_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
@@ -691,9 +691,9 @@ def test_min_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
         result = numpy.ma.array([[1.0, 2.0, 3.0],
                                  [7.0, 8.0, 12.0]],
                                 mask=[[0, 0, 0],
-                                      [0, 0, 0]], fill_value=float('inf'))
+                                      [0, 0, 0]], fill_value=float('nan'))
 
-        assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
 
     @istest
     def test_stddev_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
@@ -709,9 +709,9 @@ def test_stddev_kernel_with_dataset_in_two_dimensions_with_missing_values(self):
         result = numpy.ma.array([[numpy.sqrt(4.5), float('NaN'), numpy.sqrt(4.5)],
                                  [numpy.sqrt(4.5), 3.0, numpy.sqrt(4.5)]],
                                 mask=[[0, 1, 0],
-                                      [0, 0, 0]], fill_value=float('inf'))
+                                      [0, 0, 0]], fill_value=float('nan'))
 
-        assert_arrays_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
 
     def test_aggregation_one_dim_using_moments_kernel(self):
         self.kernel = moments()
@@ -763,14 +763,14 @@ def test_aggregating_on_grid_0_to_360_when_data_is_minus_180_to_180(self):
         grid = {'x': AggregationGrid(125, 270, 40, False)}
         agg = Aggregator(data, grid)
         output = agg.aggregate_ungridded(self.kernel)
-        assert_arrays_equal(output[0].data, [[13.5, 5.5, 6.5, 7.5]])
+        numpy.testing.assert_array_equal(output[0].data, [[13.5, 5.5, 6.5, 7.5]])
 
     def test_aggregating_on_grid_minus_180_to_180_when_data_is_0_to_360(self):
         data = make_regular_2d_ungridded_data(lat_dim_length=2, lon_dim_length=9, lon_min=5., lon_max=325.)
         grid = {'x': AggregationGrid(-75, 125, 40, False)}
         agg = Aggregator(data, grid)
         output = agg.aggregate_ungridded(self.kernel)
-        assert_arrays_equal(output[0].data, [[12.5, 13.5, 5.5, 6.5, 7.5]])
+        numpy.testing.assert_array_equal(output[0].data, [[12.5, 13.5, 5.5, 6.5, 7.5]])
 
     def test_collapsed_coords_get_output_as_length_1(self):
         data = make_regular_2d_ungridded_data()
@@ -786,7 +786,7 @@ def test_collapsed_coords_get_max_min_bounds(self):
         agg = Aggregator(data, grid)
         output = agg.aggregate_ungridded(self.kernel)
         lon = output.coord('longitude')
-        assert_arrays_equal(lon.bounds, [[-5, 5]])
+        numpy.testing.assert_array_equal(lon.bounds, [[-5, 5]])
 
     def test_aggregating_coord_to_length_one_with_explicit_bounds_gets_output_as_length_one(self):
         data = make_regular_2d_ungridded_data()
@@ -802,7 +802,7 @@ def test_aggregating_to_length_one_with_explicit_bounds_get_correct_bounds(self)
         agg = Aggregator(data, grid)
         output = agg.aggregate_ungridded(self.kernel)
         lon = output.coord('longitude')
-        assert_arrays_equal(lon.bounds, [[-180, 180]])
+        numpy.testing.assert_array_equal(lon.bounds, [[-180, 180]])
 
 
 class TestUngriddedListAggregation(TestCase):
@@ -828,12 +828,12 @@ def test_aggregating_list_of_datasets_over_two_dims(self):
                                       [0, 1, 0],
                                       [0, 0, 1],
                                       [0, 0, 0],
-                                      [1, 0, 0]], fill_value=float('inf'))
+                                      [1, 0, 0]], fill_value=float('nan'))
 
         print(cube_out[0].data.fill_value)
         assert len(cube_out) == 2
-        assert numpy.array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
-        assert numpy.array_equal(numpy.ma.filled(cube_out[1].data), numpy.ma.filled(result))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[1].data), numpy.ma.filled(result))
 
     def test_aggregation_one_dim_using_moments_kernel(self):
         self.kernel = moments()
@@ -923,7 +923,7 @@ def test_aggregating_list_of_datasets_over_two_dims_with_diff_masks(self):
                                       [0, 1, 0],
                                       [0, 0, 1],
                                       [0, 0, 0],
-                                      [1, 0, 0]], fill_value=float('inf'))
+                                      [1, 0, 0]], fill_value=float('nan'))
 
         result_1 = numpy.ma.array([[1.0, 2.0, 3.0],
                                    [4.0, 5.0, 6.0],
@@ -934,9 +934,9 @@ def test_aggregating_list_of_datasets_over_two_dims_with_diff_masks(self):
                                         [1, 1, 1],
                                         [1, 1, 1],
                                         [1, 1, 1],
-                                        [1, 1, 1]], fill_value=float('inf'))
+                                        [1, 1, 1]], fill_value=float('nan'))
 
         print(cube_out[0].data.fill_value)
         assert len(cube_out) == 2
-        assert numpy.array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result_1))
-        assert numpy.array_equal(numpy.ma.filled(cube_out[1].data), numpy.ma.filled(result_0))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[0].data), numpy.ma.filled(result_1))
+        numpy.testing.assert_array_equal(numpy.ma.filled(cube_out[1].data), numpy.ma.filled(result_0))
diff --git a/cis/test/unit/colocate/test_general_gridded_col.py b/cis/test/unit/colocate/test_general_gridded_col.py
@@ -520,13 +520,13 @@ def test_fill_value_for_cube_cell_constraint_default_fill_value(self):
 
         out_cube = col.collocate(points=sample_cube, data=data_point, constraint=con, kernel=SlowMean())[0]
 
-        expected_result = numpy.array([[float('Inf'), float('Inf'), float('Inf')],
-                                       [float('Inf'), float('Inf'), float('Inf')],
-                                       [float('Inf'), float('Inf'), float('Inf')],
-                                       [float('Inf'), float('Inf'), float('Inf')],
-                                       [float('Inf'), float('Inf'), float('Inf')]])
+        expected_result = numpy.array([[float('nan'), float('nan'), float('nan')],
+                                       [float('nan'), float('nan'), float('nan')],
+                                       [float('nan'), float('nan'), float('nan')],
+                                       [float('nan'), float('nan'), float('nan')],
+                                       [float('nan'), float('nan'), float('nan')]])
 
-        assert numpy.array_equal(out_cube.data.filled(), expected_result)
+        numpy.testing.assert_array_equal(out_cube.data.filled(), expected_result)
 
     def test_single_point_results_in_single_value_in_cell(self):
         con = CubeCellConstraint()

diff --git a/cis/test/unit/colocate/test_general_ungridded_col.py b/cis/test/unit/colocate/test_general_ungridded_col.py
@@ -72,18 +72,17 @@ def test_list_ungridded_ungridded_box_mean(self):
         output = col.collocate(sample_points, data_list, constraint, kernel)
 
         expected_result = np.array(list(range(1, 16)))
-        expected_stddev = np.array(15 * [float('inf')])
         expected_n = np.array(15 * [1])
         assert len(output) == 6
         assert isinstance(output, UngriddedDataList)
         assert output[3].var_name == 'snow'
         assert output[4].var_name == 'snow_std_dev'
         assert output[5].var_name == 'snow_num_points'
         assert np.allclose(output[0].data, expected_result)
-        assert np.allclose(output[1].data, expected_stddev)
+        assert all(output[1].data.mask)
         assert np.allclose(output[2].data, expected_n)
         assert np.allclose(output[3].data, expected_result + 3)
-        assert np.allclose(output[4].data, expected_stddev)
+        assert all(output[4].data.mask)
         assert np.allclose(output[5].data, expected_n)
 
     def test_list_gridded_ungridded_box_moments(self):

diff --git a/cis/test/unit/colocate/test_gridded_interpolation.py b/cis/test/unit/colocate/test_gridded_interpolation.py
@@ -178,10 +178,10 @@ def test_out_of_bounds_fill(self):
         sample = np.asarray([[-.1, -.1, -.1, -.1], [1.1, 1.1, 1.1, 1.1],
                              [2.1, 2.1, -1.1, -1.1]])
         interp = _RegularGridInterpolator(points, sample.T, method="nearest")
-        wanted = np.asarray([np.nan, np.nan, np.nan])
-        assert_array_almost_equal(interp(values, fill_value=np.nan), wanted)
+        # Assert that all of the elements are masked
+        assert all(interp(values, fill_value=np.nan).mask)
         interp = _RegularGridInterpolator(points, sample.T, method="linear")
-        assert_array_almost_equal(interp(values, fill_value=np.nan), wanted)
+        assert all(interp(values, fill_value=np.nan).mask)
 
         sample = np.asarray([[0.1, 0.1, 1., .9], [0.2, 0.1, .45, .8],
                              [0.5, 0.5, .5, .5]])