diff --git a/bin/gwdetchar-conlog b/bin/gwdetchar-conlog index 76ba20320..a39b9adb8 100644 --- a/bin/gwdetchar-conlog +++ b/bin/gwdetchar-conlog @@ -45,7 +45,7 @@ parser = cli.create_parser(description=__doc__) cli.add_gps_start_stop_arguments(parser) cli.add_ifo_option(parser) cli.add_frametype_option(parser, required=const.IFO is None, - default='%s_T'.format(const.IFO)) + default='{}_T'.format(const.IFO)) cli.add_nproc_option(parser) parser.add_argument('-o', '--output', default='changes.csv', help='Path to output data file, default: %(default)s') diff --git a/bin/gwdetchar-lasso-correlation b/bin/gwdetchar-lasso-correlation index f437696fd..177a9f4b7 100644 --- a/bin/gwdetchar-lasso-correlation +++ b/bin/gwdetchar-lasso-correlation @@ -149,7 +149,7 @@ if args.band_pass: logger.info("-- Loading primary channel data") bandts = get_data( primary, start-pad, end+pad, verbose='Reading primary:'.rjust(30), - obs=args.ifo[0], frametype=args.primary_frametype, nproc=args.nproc) + frametype=args.primary_frametype, nproc=args.nproc) if flower < 0 or fupper >= float((bandts.sample_rate/2.).value): raise ValueError("bandpass frequency is out of range for this " "channel, band (Hz): {0}, sample rate: {1}".format( @@ -180,7 +180,7 @@ else: # load primary channel data logger.info("-- Loading primary channel data") primaryts = get_data(primary, start, end, frametype=args.primary_frametype, - obs=args.ifo[0], verbose='Reading:'.rjust(30), + verbose='Reading:'.rjust(30), nproc=args.nproc).crop(start, end) if args.remove_outliers: @@ -216,7 +216,7 @@ else: frametype = '%s_T' % args.ifo # for second trends auxdata = get_data( - channels, start, end, verbose='Reading:'.rjust(30), obs=args.ifo[0], + channels, start, end, verbose='Reading:'.rjust(30), frametype=frametype, nproc=args.nproc, pad=0).crop(start, end) # -- removes flat data to be re-introdused later diff --git a/bin/gwdetchar-scattering b/bin/gwdetchar-scattering index 6b5de6acf..fdc2e88e9 100644 --- a/bin/gwdetchar-scattering +++ b/bin/gwdetchar-scattering @@ -284,7 +284,7 @@ for i, seg in enumerate(statea): ) if args.verbose else False alldata.append( get_data(allchannels, seg[0], seg[1], frametype=args.frametype, - obs=args.ifo[0], verbose=msg, nproc=args.nproc).resample(128)) + verbose=msg, nproc=args.nproc).resample(128)) scatter_segments = DataQualityDict() actives = SegmentList() diff --git a/bin/gwdetchar-slow-correlation b/bin/gwdetchar-slow-correlation index ef6875a2a..b95ee8419 100644 --- a/bin/gwdetchar-slow-correlation +++ b/bin/gwdetchar-slow-correlation @@ -112,7 +112,7 @@ rcParams.update(tex_settings) # load data logger.info("-- Loading range data") rangets = get_data(rangechannel, start, end, frametype=args.range_frametype, - obs=args.ifo[0], verbose=True, nproc=args.nproc) + verbose=True, nproc=args.nproc) if args.trend_type == 'minute': dstart, dend = rangets.span @@ -121,7 +121,7 @@ else: dend = end logger.info("-- Loading h(t) data") -darmts = get_data(primary, dstart-pad, dend+pad, verbose=True, obs=args.ifo[0], +darmts = get_data(primary, dstart-pad, dend+pad, verbose=True, frametype=args.primary_frametype, nproc=args.nproc) # get darm BLRMS @@ -185,7 +185,7 @@ if args.trend_type == 'minute': else: frametype = '%s_T' % args.ifo # for second trends auxdata = get_data(map(str, channels), dstart, dend, verbose=True, pad=0, - obs=args.ifo[0], frametype=frametype, nproc=args.nproc) + frametype=frametype, nproc=args.nproc) gpsstub = '%d-%d' % (start, end-start) re_delim = re.compile('[:_-]') diff --git a/gwdetchar/io/datafind.py b/gwdetchar/io/datafind.py index 1d8ebbd89..157a0baa6 100644 --- a/gwdetchar/io/datafind.py +++ b/gwdetchar/io/datafind.py @@ -19,7 +19,9 @@ """gw_data_find wrappers """ +import re import warnings +from six.moves.urllib.error import HTTPError import gwdatafind @@ -118,7 +120,7 @@ def remove_missing_channels(channels, gwfcache): return list(keep) -def get_data(channel, start, end, obs=None, frametype=None, source=None, +def get_data(channel, start, end, frametype=None, source=None, nproc=1, verbose=False, **kwargs): """Retrieve data for given channels within a certain time range @@ -133,17 +135,12 @@ def get_data(channel, start, end, obs=None, frametype=None, source=None, end : `float` GPS end time of requested data - obs : `str`, optional - single-letter name of observatory, defaults to the first letter of - `frametype` - frametype : `str`, optional - name of frametype in which channel(s) are stored, required if `source` - is `None` + name of frametype in which channel(s) are stored, default: `None` source : `str`, `list`, optional - `str` path(s) of a LAL-format cache file or individual data file, will - supercede `frametype` if given, defaults to `None` + path(s) of a LAL-format cache file or individual data file, + default: `None` nproc : `int`, optional number of parallel processes to use, uses serial process by default @@ -153,7 +150,7 @@ def get_data(channel, start, end, obs=None, frametype=None, source=None, **kwargs : `dict`, optional additional keyword arguments to `~gwpy.timeseries.TimeSeries.read` - or `~gwpy.timeseries.TimeSeries.fetch` + or `~gwpy.timeseries.TimeSeries.get` Returns ------- @@ -166,35 +163,55 @@ def get_data(channel, start, end, obs=None, frametype=None, source=None, If `channel` is a `str`, then a `TimeSeries` object will be returned, else the result is a `TimeSeriesDict`. + The `frametype` argument should be used to read from archived frame files, + while `source` should be used to read from a local cache or specific data + file. If either fails, or if neither is passed, this function will attempt + to get data over an NDS server. + + If `frametype` is used to read from the archive, any channels missing + from the first or last frame file in the requested time range will be + ignored. + See Also -------- - gwpy.timeseries.TimeSeries.fetch - for the underlying method to read from an NDS server + remove_missing_channels + a utility that removes channels missing from the frame archive + gwpy.timeseries.TimeSeries.get + the underlying method to read data over an NDS server gwpy.timeseries.TimeSeries.read - for the underlying method to read from a local file cache + the underlying method to read data from local files """ # get TimeSeries class if isinstance(channel, (list, tuple)): series_class = TimeSeriesDict else: series_class = TimeSeries - # construct file cache if none is given - if source is None: - obs = obs if obs is not None else frametype[0] - source = gwdatafind.find_urls(obs, frametype, start, end) - # read from frames or NDS - if source: - if isinstance(channel, (list, tuple)): - channel = remove_missing_channels(channel, source) + + if frametype is not None: + try: # locate frame files + ifo = re.search('[A-Z]1', frametype).group(0) + obs = ifo[0] + source = gwdatafind.find_urls(obs, frametype, start, end) + except AttributeError: + raise AttributeError( + 'Could not determine observatory from frametype') + except HTTPError: # frame files not found + pass + if isinstance(source, list) and isinstance(channel, (list, tuple)): + channel = remove_missing_channels(channel, source) + if source is not None: # read from frame files return series_class.read( source, channel, start=start, end=end, nproc=nproc, verbose=verbose, **kwargs) - elif isinstance(channel, str): - return series_class.fetch( + + # read single channel from NDS + if not isinstance(channel, (list, tuple)): + return series_class.get( channel, start, end, verbose=verbose, **kwargs) + # if all else fails, process channels in groups of 60 data = series_class() for group in [channel[i:i + 60] for i in range(0, len(channel), 60)]: - data.append(series_class.fetch( + data.append(series_class.get( group, start, end, verbose=verbose, **kwargs)) return data diff --git a/gwdetchar/io/tests/test_datafind.py b/gwdetchar/io/tests/test_datafind.py index 03a97f89d..8e653271f 100644 --- a/gwdetchar/io/tests/test_datafind.py +++ b/gwdetchar/io/tests/test_datafind.py @@ -23,6 +23,7 @@ import numpy from numpy import testing as nptest +from six.moves.urllib.error import HTTPError from gwpy.testing.compat import mock from gwpy.timeseries import (TimeSeries, TimeSeriesDict) @@ -65,41 +66,46 @@ def test_remove_missing_channels(io_gwf): assert channels == ['X1:TEST-STRAIN'] -@mock.patch('gwpy.timeseries.TimeSeries.fetch', return_value=HOFT) -def test_get_data_from_NDS(tsfetch): +@mock.patch('gwpy.timeseries.TimeSeries.get', return_value=HOFT) +def test_get_data_from_NDS(tsget): # retrieve data start = 0 end = 64 channel = 'X1:TEST-STRAIN' - data = datafind.get_data(channel, start, end, source=0) + data = datafind.get_data(channel, start, end) # test data products assert isinstance(data, TimeSeries) nptest.assert_array_equal(data.value, HOFT.value) -@mock.patch('gwpy.timeseries.TimeSeriesDict.fetch', +@mock.patch('gwpy.timeseries.TimeSeriesDict.get', return_value=TimeSeriesDict({'X1:TEST-STRAIN': HOFT})) -def test_get_data_dict_from_NDS(tsdfetch): +def test_get_data_dict_from_NDS(tsdget): # retrieve data start = 33 end = 64 channels = ['X1:TEST-STRAIN'] - data = datafind.get_data(channels, start, end, source=0) + data = datafind.get_data(channels, start, end) # test data products assert isinstance(data, TimeSeriesDict) nptest.assert_array_equal(data['X1:TEST-STRAIN'].value, HOFT.value) -@mock.patch('gwpy.timeseries.TimeSeries.read', - return_value=HOFT.crop(16, 48)) -def test_get_data_from_cache(tsfetch): +@mock.patch('gwdatafind.find_urls') +@mock.patch('gwpy.timeseries.TimeSeries.read') +def test_get_data_from_cache(tsget, find_data): + # set return values + find_data.return_value = ['test.gwf'] + tsget.return_value = HOFT.crop(16, 48) + # retrieve test frame start = 16 end = start + 32 channel = 'X1:TEST-STRAIN' - data = datafind.get_data(channel, start, end, source=True) + frametype = 'X1_TEST' + data = datafind.get_data(channel, start, end, frametype=frametype) # test data products assert isinstance(data, TimeSeries) @@ -108,13 +114,15 @@ def test_get_data_from_cache(tsfetch): nptest.assert_array_equal(data.value, HOFT.crop(start, end).value) +@mock.patch('gwdatafind.find_urls') @mock.patch('gwdetchar.io.datafind.remove_missing_channels') @mock.patch('gwpy.timeseries.TimeSeriesDict.read') -def test_get_data_dict_from_cache(tsdfetch, remove): +def test_get_data_dict_from_cache(tsdget, remove, find_data): # set return values - tsdfetch.return_value = TimeSeriesDict({ + tsdget.return_value = TimeSeriesDict({ 'X1:TEST-STRAIN': HOFT.crop(16, 48)}) remove.return_value = ['X1:TEST-STRAIN'] + find_data.return_value = ['test.gwf'] # retrieve test frame start = 16 end = start + 32 @@ -129,7 +137,8 @@ def test_get_data_dict_from_cache(tsdfetch, remove): HOFT.crop(start, end).value) -def test_fail_on_no_frametype(): +def test_get_data_bad_frametype(): channel = 'X1:TEST-STRAIN' - with pytest.raises(TypeError): - datafind.get_data(channel, start=0, end=32) + with pytest.raises(AttributeError) as exc: + datafind.get_data(channel, start=0, end=32, frametype='bad_frametype') + assert 'Could not determine observatory' in str(exc.value)