From 7049d3893f8748691e9d3c289ad722c3dda757d9 Mon Sep 17 00:00:00 2001 From: Kim Whitehall Date: Mon, 23 Feb 2015 08:06:54 -0500 Subject: [PATCH 1/5] CLIMATE585 Fixed the bias calcuation in Bias, TemporalMeanBias, and SpatialMeanOfTemporalMeanBias --- ocw/metrics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocw/metrics.py b/ocw/metrics.py index 855d4e38..47c20ded 100644 --- a/ocw/metrics.py +++ b/ocw/metrics.py @@ -86,7 +86,7 @@ def run(self, ref_dataset, target_dataset): :returns: The difference between the reference and target datasets. :rtype: :class:`numpy.ndarray` ''' - return ref_dataset.values - target_dataset.values + return target_dataset.values - ref_dataset.values class TemporalStdDev(UnaryMetric): @@ -210,7 +210,7 @@ def run(self, ref_dataset, target_dataset, absolute=False): :returns: The mean bias between a reference and target dataset over time. ''' - diff = ref_dataset.values - target_dataset.values + diff = target_dataset.values - ref_dataset.values if absolute: diff = abs(diff) mean_bias = diff.mean(axis=0) @@ -238,7 +238,7 @@ def run(self, reference_dataset, target_dataset): :returns: The bias averaged over time and domain ''' - bias = reference_dataset.values - target_dataset.values + bias = target_dataset.values - reference_dataset.values return bias.mean() From 8341e5f4ffe513f4d2ce28a96910e41995f5d840 Mon Sep 17 00:00:00 2001 From: Kim Whitehall Date: Wed, 25 Feb 2015 17:37:02 -0800 Subject: [PATCH 2/5] added units instance attribute to class Dataset & updated functions accordingly --- ocw/dataset.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ocw/dataset.py b/ocw/dataset.py index 1d4b2d87..7eb6543c 100644 --- a/ocw/dataset.py +++ b/ocw/dataset.py @@ -35,7 +35,7 @@ class Dataset: '''Container for a dataset's attributes and data.''' - def __init__(self, lats, lons, times, values, variable=None, name=""): + def __init__(self, lats, lons, times, values, variable=None, units=None, name=""): '''Default Dataset constructor :param lats: One dimensional numpy array of unique latitude values. @@ -55,6 +55,9 @@ def __init__(self, lats, lons, times, values, variable=None, name=""): :param variable: Name of the value variable. :type variable: :mod:`string` + :param units: Name of the value units + :type units: :mod:`string` + :param name: An optional string name for the Dataset. :type name: :mod:`string` @@ -68,6 +71,7 @@ def __init__(self, lats, lons, times, values, variable=None, name=""): self.times = times self.values = values self.variable = variable + self.units = units self.name = name def spatial_boundaries(self): @@ -198,6 +202,7 @@ def __str__(self): "lon-range: {}, " "time_range: {}, " "var: {}>" + "units: {}>" ) return formatted_repr.format( @@ -205,7 +210,8 @@ def __str__(self): lat_range, lon_range, time_range, - self.variable + self.variable, + self.units ) From c6b9ff8a79263afe5a54c44d252266eb92eab6a3 Mon Sep 17 00:00:00 2001 From: Kim Whitehall Date: Wed, 25 Feb 2015 17:47:59 -0800 Subject: [PATCH 3/5] update data_source/local.py to store units from datasets --- ocw/data_source/local.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py index c6405a93..299d92de 100644 --- a/ocw/data_source/local.py +++ b/ocw/data_source/local.py @@ -111,6 +111,7 @@ def _get_netcdf_variable_name(valid_var_names, netcdf, netcdf_var): def load_file(file_path, variable_name, + variable_unit=None, elevation_index=0, name='', lat_name=None, @@ -124,6 +125,9 @@ def load_file(file_path, :param variable_name: The variable name to load from the NetCDF file. :type variable_name: :mod:`string` + :param variable_unit: The variable unit to load from the NetCDF file. + :type variable_unit: :mod:`string` + :param elevation_index: (Optional) The elevation index for which data should be returned. Climate data is often times 4 dimensional data. Some datasets will have readins at different height/elevation levels. OCW @@ -182,6 +186,7 @@ def load_file(file_path, times = utils.decode_time_values(netcdf, time_name) times = numpy.array(times) values = ma.array(netcdf.variables[variable_name][:]) + variable_unit = netcdf.variables[variable_name].units # If the values are 4D then we need to strip out the elevation index if len(values.shape) == 4: @@ -206,4 +211,4 @@ def load_file(file_path, else: values = values [:,:,:,elevation_index] - return Dataset(lats, lons, times, values, variable_name, name=name) + return Dataset(lats, lons, times, values, variable_name, variable_unit, name=name) From c5c598d4188ef6221c5b0705c86c86ddb9404e92 Mon Sep 17 00:00:00 2001 From: Kim Whitehall Date: Wed, 25 Feb 2015 17:51:00 -0800 Subject: [PATCH 4/5] CLIMATE-591 - update data_source/rcmed.py to store units from the database --- ocw/data_source/rcmed.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocw/data_source/rcmed.py b/ocw/data_source/rcmed.py index d1840789..63b4b437 100644 --- a/ocw/data_source/rcmed.py +++ b/ocw/data_source/rcmed.py @@ -347,7 +347,7 @@ def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_l ''' parameters_metadata = get_parameters_metadata() - parameter_name, time_step, _, _, _, _, _= _get_parameter_info(parameters_metadata, parameter_id) + parameter_name, time_step, _, _, _, _, parameter_units = _get_parameter_info(parameters_metadata, parameter_id) url = _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step) lats, lons, times, values = _get_data(url) @@ -356,4 +356,4 @@ def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_l values = _reshape_values(values, unique_lats_lons_times) values = _make_mask_array(values, parameter_id, parameters_metadata) - return Dataset(unique_lats_lons_times[0], unique_lats_lons_times[1], unique_times, values, parameter_name, name=name) + return Dataset(unique_lats_lons_times[0], unique_lats_lons_times[1], unique_times, values, parameter_name, parameter_units, name=name) From b4f38ef6b74d50a8027bf067fed49c130b8f892c Mon Sep 17 00:00:00 2001 From: Kim Whitehall Date: Wed, 25 Feb 2015 17:54:00 -0800 Subject: [PATCH 5/5] CLIMATE-592 - update dataset_processor.py to accommodate units in Dataset object --- ocw/dataset_processor.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py index 37296f26..c4ddc56d 100644 --- a/ocw/dataset_processor.py +++ b/ocw/dataset_processor.py @@ -62,6 +62,7 @@ def temporal_rebin(target_dataset, temporal_resolution): binned_dates, binned_values, target_dataset.variable, + target_dataset.units, target_dataset.name) return new_dataset @@ -117,6 +118,7 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes): target_dataset.times, new_values, target_dataset.variable, + target_dataset.units, target_dataset.name) return regridded_dataset @@ -140,6 +142,7 @@ def ensemble(datasets): datasets[0].lons, datasets[0].times, ensemble_values, + datasets[0].units, name="Dataset Ensemble") return ensemble_dataset @@ -182,6 +185,7 @@ def subset(subregion, target_dataset): dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1, dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1], target_dataset.variable, + target_dataset.units, target_dataset.name ) @@ -248,6 +252,7 @@ def normalize_dataset_datetimes(dataset, timestep): np.array(new_times), dataset.values, dataset.variable, + dataset.units, dataset.name ) @@ -298,6 +303,33 @@ def write_netcdf(dataset, path, compress=True): out_file.close() +def unit_conversion(dataset): + ''' convert the units of model water flux variables (precipitation, evaporation, runoff) into "mm/day" as necessary + refactored from do_data_prep.py + + :param dataset: The dataset to convert. + :type dataset: :class:`dataset.Dataset` + + returns: dataset with new units + rtype(:class::`dataset.Dataset`) + + ''' + if ('pr' in dataset.variable) or ('precip' in dataset.variable) or ('evspsbl' in dataset.variable) or ('mrro' in dataset.variable)or ('mrros'in dataset.variable): + if ('KG M-2 S-1' in dataset.units) or ('kg m-2 s-1' in dataset.units) or ('MM S-1' in dataset.units) or ('mm s-1' in dataset.units) or ('mm/sec' in dataset.units): + dataset.values = 86400. * dataset.values + dataset.units = 'mm/day' + else: + pass + elif ('SWE' in dataset.variable) or ('swe' in dataset.variable): + if (dataset.units=='m') or (dataset.units=='M') or (dataset.units=='meter') or (dataset.units=='METER'): + dataset.values = 1.e3 * dataset.values + dataset.units = 'km' + else: + pass + + return dataset + + def _rcmes_normalize_datetimes(datetimes, timestep): """ Normalize Dataset datetime values.