Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 1 addition & 13 deletions aodntools/timeseries_products/aggregated_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pkg_resources import resource_filename

from aodntools import __version__
from aodntools.timeseries_products.common import NoInputFilesError, check_file
from aodntools.timeseries_products.common import NoInputFilesError, check_file, in_water

TEMPLATE_JSON = resource_filename(__name__, 'aggregated_timeseries_template.json')

Expand Down Expand Up @@ -71,18 +71,6 @@ def get_instrument_id(nc):
return '; '.join([deployment_code, instrument, instrument_serial_number])


def in_water(nc):
"""
cut data the entire dataset to in-water only timestamps, dropping the out-of-water records.
:param nc: xarray dataset
:return: xarray dataset
"""
time_deployment_start = np.datetime64(nc.attrs['time_deployment_start'][:-1])
time_deployment_end = np.datetime64(nc.attrs['time_deployment_end'][:-1])
TIME = nc['TIME'][:]
return nc.where((TIME >= time_deployment_start) & (TIME <= time_deployment_end), drop=True)


def get_nominal_depth(nc):
"""
retunr nominal depth from NOMINAL_DEPTH variable or
Expand Down
33 changes: 32 additions & 1 deletion aodntools/timeseries_products/common.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Code shared by all timeseries product generating code"""
import numpy as np


class NoInputFilesError(Exception):
Expand Down Expand Up @@ -66,7 +67,8 @@ def check_file(nc, site_code, variables_of_interest,
* At least one variable of interest is present
* All variables of interest have only the allowed dimensions (TIME, LATITUDE, LONGITUDE)
* If LATITUDE or LONIGUTDE are dimension, they have length 1
* Global attributes time_deployment_start and time_deployment_end exist
* Global attributes time_deployment_start and time_deployment_end exist, and there is at least one
value of the TIME variable that falls within the range they define.
* QC flag variables use the IMOS flag conventions

:param nc: open xarray dataset
Expand Down Expand Up @@ -116,8 +118,14 @@ def check_file(nc, site_code, variables_of_interest,
error_list.append('no NOMINAL_DEPTH')

required_attributes = {'time_deployment_start', 'time_deployment_end'}
have_time_attributes = True
for attr in required_attributes - attributes:
error_list.append('no {} attribute'.format(attr))
have_time_attributes = False

# check for existence of in-water data
if have_time_attributes and not in_water_index(nc).any():
error_list.append('no in-water data')

# check qc flag conventions for VoI and depth/pressure
error_list.extend(check_imos_flag_conventions(nc))
Expand Down Expand Up @@ -153,3 +161,26 @@ def check_velocity_file(nc, site_code,

return check_file(nc, site_code, variables_of_interest=('UCUR', 'VCUR', 'WCUR'),
required_variables=required_variables, allowed_dimensions=allowed_dimensions)


def in_water_index(nc):
"""
Return a boolean index of only the in-water data in a dataset.
In-water period is defined by the global attributes time_deployment_start and time_deployment_end.

:param nc: xarray dataset
:return: numpy.ndarray boolean index array
"""
time_deployment_start = np.datetime64(nc.attrs['time_deployment_start'][:-1])
time_deployment_end = np.datetime64(nc.attrs['time_deployment_end'][:-1])
TIME = nc['TIME'][:]
return (TIME >= time_deployment_start) & (TIME <= time_deployment_end)

def in_water(nc):
"""
cut data to in-water only timestamps, dropping resulting NaN.

:param nc: xarray dataset
:return: xarray dataset
"""
return nc.where(in_water_index(nc), drop=True)
21 changes: 3 additions & 18 deletions aodntools/timeseries_products/hourly_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@

from aodntools import __version__
from aodntools.timeseries_products import aggregated_timeseries as utils
from aodntools.timeseries_products.common import NoInputFilesError, check_file, get_qc_variable_names

from aodntools.timeseries_products.common import NoInputFilesError, check_file, get_qc_variable_names, in_water

TEMPLATE_JSON = resource_filename(__name__, 'hourly_timeseries_template.json')
BINNING_METHOD_JSON = resource_filename(__name__, 'binning_method.json')
Expand Down Expand Up @@ -66,19 +65,6 @@ def get_parameter_names(nc):
return params


def in_water(nc):
"""
cut data to in-water only timestamps, dropping resulting NaN.

:param nc: xarray dataset
:return: xarray dataset
"""
time_deployment_start = np.datetime64(nc.attrs['time_deployment_start'][:-1])
time_deployment_end = np.datetime64(nc.attrs['time_deployment_end'][:-1])
TIME = nc['TIME'][:]
return nc.where((TIME >= time_deployment_start) & (TIME <= time_deployment_end), drop=True)


def good_data_only(nc, qcflags):
"""
mask all the variables with QC for QC value less or equal than the specified
Expand Down Expand Up @@ -421,10 +407,9 @@ def hourly_aggregator(files_to_aggregate, site_code, qcflags, input_dir='', outp
'NOMINAL_DEPTH': get_nominal_depth(nc)},
ignore_index=True)

df_temp = nc_clean.to_dataframe()
# reindex in case TIME had out-of-range values before cleaning
df_temp = nc_clean.reindex({'TIME': nc_clean.TIME.values}).to_dataframe()

## keep TIME as the only index
df_temp = df_temp.reset_index().set_index('TIME')
df_temp = df_temp[parameter_names]

df_temp = PDresample_by_hour(df_temp, function_dict, function_stats) # do the magic
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ def test_source_file_attributes(self):
self.assertEqual(dataset['source_file'].opendap_url_prefix, 'http://test.opendap.url')

def test_all_rejected(self):
self.assertRaises(NoInputFilesError, main_aggregator, [BAD_FILE], 'TEMP', 'NRSROT', input_dir=TEST_ROOT)
self.assertRaises(NoInputFilesError, main_aggregator, [BAD_FILE], 'TEMP', 'NRSROT',
input_dir=TEST_ROOT, output_dir='/tmp')


if __name__ == '__main__':
Expand Down
29 changes: 27 additions & 2 deletions test_aodntools/timeseries_products/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import xarray as xr

from aodntools.timeseries_products.common import (check_file, check_velocity_file, get_qc_variable_names,
check_imos_flag_conventions)
check_imos_flag_conventions, in_water_index, in_water)

TEST_ROOT = os.path.dirname(__file__)
GOOD_TZ_FILE = os.path.join(
Expand Down Expand Up @@ -80,7 +80,8 @@ def test_bad_velocity_file(self):
error_list = check_velocity_file(nc, 'NWSROW')
self.assertEqual(set(error_list), {'VCUR variable missing',
'DEPTH variable missing',
"dimension(s) {'DIST_ALONG_BEAMS'} not allowed for UCUR"
"dimension(s) {'DIST_ALONG_BEAMS'} not allowed for UCUR",
'no in-water data'
}
)

Expand All @@ -94,5 +95,29 @@ def test_am_file(self):
}
)


class TestInWater(unittest.TestCase):
def test_in_water_index_ok(self):
with xr.open_dataset(BAD_TZ_FILE) as nc:
nc.attrs['time_deployment_start'] = '2018-12-13T08:00:00Z'
nc.attrs['time_deployment_end'] = '2018-12-14T00:30:00Z'
index = in_water_index(nc)
self.assertTrue(all(index[:-2]))
self.assertFalse(any(index[-2:]))

def test_in_water_index_bad(self):
with xr.open_dataset(BAD_V_FILE) as nc:
index = in_water_index(nc)
self.assertFalse(all(index))

def test_in_water_ok(self):
with xr.open_dataset(BAD_TZ_FILE) as nc:
nc.attrs['time_deployment_start'] = '2018-12-13T08:00:00Z'
nc.attrs['time_deployment_end'] = '2018-12-14T00:30:00Z'
nc_in = in_water(nc)

self.assertEqual(len(nc_in.TIME), len(nc.TIME) - 2)
self.assertTrue(all(nc_in.TIME.values == nc.TIME[:-2].values))

if __name__ == '__main__':
unittest.main()
44 changes: 44 additions & 0 deletions test_aodntools/timeseries_products/test_hourly_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
BAD_FILE
]
INPUT_PATHS = [os.path.join(TEST_ROOT, f) for f in INPUT_FILES]
EXPECTED_OUTPUT_FILE = os.path.join(
TEST_ROOT, 'IMOS_ANMN-NRS_STZ_20181213_NRSROT_FV02_hourly-timeseries_END-20190523_C-20220404.nc'
)

INST_VARIABLES = {'instrument_id', 'source_file', 'LONGITUDE', 'LATITUDE', 'NOMINAL_DEPTH'}
OBS_VARIABLES = {'instrument_index', 'TIME'}
Expand All @@ -33,6 +36,17 @@
for s in function_stats:
OBS_VARIABLES.add(v + s)

NO_INWATER_DATA_FILE = 'IMOS_ANMN-NSW_TZ_PH100_NO_INWATER_DATA.nc'
PH100_FILES = [
'IMOS_ANMN-NSW_TZ_20200703T001500Z_PH100_FV01_PH100-2007-Aqualogger-520T-96_END-20200907T233000Z_C-20210112T044909Z.nc',
'IMOS_ANMN-NSW_TZ_PH100_ALL_FLAGGED_BAD.nc',
NO_INWATER_DATA_FILE
]

SYD100_FILES = [
'IMOS_ANMN-NSW_TZ_SYD100_BAD_TIMESTAMPS.nc',
]


class TestHourlyTimeseries(BaseTestCase):
def test_hourly_aggregator(self):
Expand Down Expand Up @@ -73,6 +87,15 @@ def test_hourly_aggregator(self):
self.assertIn('hourly_timeseries.py', dataset.lineage)
self.assertIn(BAD_FILE, dataset.rejected_files)

# check variable values
expected = Dataset(EXPECTED_OUTPUT_FILE)
compare_vars = ('TIME', 'NOMINAL_DEPTH', 'instrument_index',
'TEMP', 'TEMP_count', 'TEMP_min', 'TEMP_max')
non_match_vars = [var for var in compare_vars
if not all(dataset[var][:] == expected[var][:])
]
self.assertEqual(non_match_vars, [])

def test_hourly_aggregator_with_nonqc(self):
output_file, bad_files = hourly_aggregator(files_to_aggregate=INPUT_FILES,
site_code='NRSROT',
Expand All @@ -96,6 +119,27 @@ def test_hourly_aggregator_with_nonqc(self):
def test_all_rejected(self):
self.assertRaises(NoInputFilesError, hourly_aggregator, [BAD_FILE], 'NRSROT', (1, 2), input_dir=TEST_ROOT)

def test_some_files_without_good_data(self):
output_file, bad_files = hourly_aggregator(files_to_aggregate=PH100_FILES,
site_code='PH100',
qcflags=(1, 2),
input_dir=TEST_ROOT,
output_dir='/tmp'
)
self.assertEqual(1, len(bad_files))
for path, errors in bad_files.items():
self.assertEqual(NO_INWATER_DATA_FILE, path)
self.assertIn('no in-water data', errors)

def test_bad_timestamps(self):
output_file, bad_files = hourly_aggregator(files_to_aggregate=SYD100_FILES,
site_code='SYD100',
qcflags=(1, 2),
input_dir=TEST_ROOT,
output_dir='/tmp'
)
self.assertEqual(0, len(bad_files))


if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ def test_main_aggregator(self):
self.assertEqual(non_match_vars, [])

def test_all_rejected(self):
self.assertRaises(NoInputFilesError, velocity_aggregated, [BAD_FILE], 'NRSROT', input_dir=TEST_ROOT)
self.assertRaises(NoInputFilesError, velocity_aggregated, [BAD_FILE], 'NRSROT',
input_dir=TEST_ROOT, output_dir='/tmp')


if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion test_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
--index-url https://pypi.python.org/simple/
-r requirements.txt
-e .[testing]
-e .[testing]