From 97269f40ebf30c4413a0300a0ec743e507fd2737 Mon Sep 17 00:00:00 2001 From: Michael Joyce Date: Sat, 29 Mar 2014 16:31:02 -0700 Subject: [PATCH] CLIMATE-386 - Add functionality to output a dataset as NetCDF - Add dataset_processor.write_netcdf for outputting a Dataset object as a NetCDF4 file. - Add unit tests for write_netcdf functionality. --- ocw/dataset_processor.py | 48 +++++++++++++++++++++++++++++ ocw/tests/test_dataset_processor.py | 25 +++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py index e9adba2a..72c01726 100644 --- a/ocw/dataset_processor.py +++ b/ocw/dataset_processor.py @@ -23,6 +23,7 @@ import scipy.interpolate import scipy.ndimage from scipy.ndimage import map_coordinates +import netCDF4 import logging @@ -210,6 +211,53 @@ def normalize_dataset_datetimes(dataset, timestep): dataset.name ) +def write_netcdf(dataset, path, compress=True): + ''' Write a dataset to a NetCDF file. + + :param dataset: The dataset to write. + :type dataset: ocw.dataset.Dataset + + :param path: The output file path. + :type path: string + ''' + out_file = netCDF4.Dataset(path, 'w', format='NETCDF4') + + # Set attribute lenghts + lat_len = len(dataset.lats) + lon_len = len(dataset.lons) + time_len = len(dataset.times) + + # Create attribute dimensions + lat_dim = out_file.createDimension('lat', lat_len) + lon_dim = out_file.createDimension('lon', lon_len) + time_dim = out_file.createDimension('time', time_len) + + # Create variables + lats = out_file.createVariable('lat', 'f8', ('lat',), zlib=compress) + lons = out_file.createVariable('lon', 'f8', ('lon',), zlib=compress) + times = out_file.createVariable('time', 'f8', ('time',), zlib=compress) + + var_name = dataset.variable if dataset.variable else 'var' + values = out_file.createVariable(var_name, + 'f8', + ('time', 'lat', 'lon'), + zlib=compress) + + # Set the time variable units + # We don't deal with hourly/minutely/anything-less-than-a-day data so + # we can safely stick with a 'days since' offset here. Note that the + # NetCDF4 helper date2num doesn't support 'months' or 'years' instead + # of days. + times.units = "days since %s" % dataset.times[0] + + # Store the dataset's values + lats[:] = dataset.lats + lons[:] = dataset.lons + times[:] = netCDF4.date2num(dataset.times, times.units) + values[:] = dataset.values + + out_file.close() + def _rcmes_normalize_datetimes(datetimes, timestep): """ Normalize Dataset datetime values. diff --git a/ocw/tests/test_dataset_processor.py b/ocw/tests/test_dataset_processor.py index a144d4ad..f883ad98 100644 --- a/ocw/tests/test_dataset_processor.py +++ b/ocw/tests/test_dataset_processor.py @@ -17,8 +17,11 @@ import unittest import datetime +import os + from ocw import dataset_processor as dp from ocw import dataset as ds +from ocw.data_source import local import numpy as np import numpy.ma as ma @@ -268,6 +271,28 @@ def test_out_of_dataset_bounds_end(self): with self.assertRaises(ValueError): dp.subset(self.subregion, self.target_dataset) +class TestNetCDFWrite(unittest.TestCase): + def setUp(self): + self.ds = ten_year_monthly_dataset() + self.file_name = 'test.nc' + + def tearDown(self): + if os.path.isfile(self.file_name): + os.remove(self.file_name) + + def test_file_write(self): + dp.write_netcdf(self.ds, self.file_name) + self.assertTrue(os.path.isfile(self.file_name)) + + def test_that_file_contents_are_valid(self): + dp.write_netcdf(self.ds, self.file_name) + new_ds = local.load_file(self.file_name, self.ds.variable) + + self.assertEqual(self.ds.variable, new_ds.variable) + self.assertTrue(np.array_equal(self.ds.lats, new_ds.lats)) + self.assertTrue(np.array_equal(self.ds.lons, new_ds.lons)) + self.assertTrue(np.array_equal(self.ds.times, new_ds.times)) + self.assertTrue(np.array_equal(self.ds.values, new_ds.values)) def ten_year_monthly_dataset(): lats = np.array(range(-89, 90, 2))