From 58eb9e786cc9452bdacb6118ef6ba89281c1e48c Mon Sep 17 00:00:00 2001 From: Michael Joyce Date: Fri, 4 Apr 2014 19:54:40 -0700 Subject: [PATCH] CLIMATE-393 - Add dataset_processor.safe_subset - Add safe_subset for gracefully handling subsetting when not all of the bounding parameters are fully contained in the target dataset. If any of the bounding values fall outside of the dataset's bounds they are defaulted to the dataset's maximum/minimum. --- ocw/dataset_processor.py | 39 ++++++++++++++++ ocw/tests/test_dataset_processor.py | 69 +++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py index 72c01726..0d08699f 100644 --- a/ocw/dataset_processor.py +++ b/ocw/dataset_processor.py @@ -187,6 +187,45 @@ def subset(subregion, target_dataset): target_dataset.name ) +def safe_subset(subregion, target_dataset): + '''Safely subset given dataset with subregion information + + A standard subset requires that the provided subregion be entirely contained + within the datasets bounds. `safe_subset` returns the overlap of the + subregion and dataset without returning an error. + + :param subregion: The Bounds with which to subset the target Dataset. + :type subregion: ocw.dataset.Bounds + :param target_dataset: The Dataset object to subset. + :type target_dataset: ocw.dataset.Dataset + + :returns: The subset-ed Dataset object + :rtype: Dataset + ''' + + lat_min, lat_max, lon_min, lon_max = target_dataset.spatial_boundaries() + start, end = target_dataset.time_range() + + if subregion.lat_min < lat_min: + subregion.lat_min = lat_min + + if subregion.lat_max > lat_max: + subregion.lat_max = lat_max + + if subregion.lon_min < lon_min: + subregion.lon_min = lon_min + + if subregion.lon_max > lon_max: + subregion.lon_max = lon_max + + if subregion.start < start: + subregion.start = start + + if subregion.end > end: + subregion.end = end + + return subset(subregion, target_dataset) + def normalize_dataset_datetimes(dataset, timestep): ''' Normalize Dataset datetime values. diff --git a/ocw/tests/test_dataset_processor.py b/ocw/tests/test_dataset_processor.py index f883ad98..86f33279 100644 --- a/ocw/tests/test_dataset_processor.py +++ b/ocw/tests/test_dataset_processor.py @@ -228,6 +228,75 @@ def test_subset_using_non_exact_temporal_bounds(self): "time_end" : 49} self.assertDictEqual(index_slices, control_index_slices) +class TestSafeSubset(unittest.TestCase): + def setUp(self): + lats = np.array(range(-60, 61, 1)) + lons = np.array(range(-170, 171, 1)) + times = np.array([datetime.datetime(year, month, 1) + for year in range(2000, 2010) + for month in range(1, 13)]) + values = np.ones([len(times), len(lats), len(lons)]) + self.target_dataset = ds.Dataset(lats, + lons, + times, + values, + variable="test variable name", + name='foo') + + self.spatial_out_of_bounds = ds.Bounds( + -165, 165, + -180, 180, + datetime.datetime(2001, 1, 1), + datetime.datetime(2004, 1, 1) + ) + + self.temporal_out_of_bounds = ds.Bounds( + -40, 40, + -160.25, 160.5, + datetime.datetime(1999, 1, 15), + datetime.datetime(2222, 2, 15) + ) + + self.everything_out_of_bounds = ds.Bounds( + -165, 165, + -180, 180, + datetime.datetime(1999, 1, 15), + datetime.datetime(2222, 2, 15) + ) + + def test_partial_spatial_overlap(self): + '''Ensure that safe_subset can handle out of bounds spatial values''' + ds = dp.safe_subset(self.spatial_out_of_bounds, self.target_dataset) + spatial_bounds = ds.spatial_boundaries() + self.assertEquals(spatial_bounds[0], -60) + self.assertEquals(spatial_bounds[1], 60) + self.assertEquals(spatial_bounds[2], -170) + self.assertEquals(spatial_bounds[3], 170) + + def test_partial_temporal_overlap(self): + '''Ensure that safe_subset can handle out of bounds temporal values''' + ds = dp.safe_subset(self.temporal_out_of_bounds, self.target_dataset) + temporal_bounds = ds.time_range() + start = datetime.datetime(2000, 1, 1) + end = datetime.datetime(2009, 12, 1) + + self.assertEquals(temporal_bounds[0], start) + self.assertEquals(temporal_bounds[1], end) + + def test_entire_bounds_overlap(self): + ds = dp.safe_subset(self.everything_out_of_bounds, self.target_dataset) + spatial_bounds = ds.spatial_boundaries() + temporal_bounds = ds.time_range() + start = datetime.datetime(2000, 1, 1) + end = datetime.datetime(2009, 12, 1) + + self.assertEquals(spatial_bounds[0], -60) + self.assertEquals(spatial_bounds[1], 60) + self.assertEquals(spatial_bounds[2], -170) + self.assertEquals(spatial_bounds[3], 170) + self.assertEquals(temporal_bounds[0], start) + self.assertEquals(temporal_bounds[1], end) + class TestFailingSubset(unittest.TestCase): def setUp(self): self.target_dataset = ten_year_monthly_dataset()