Merge ca4a2e0 into a801697

gwdetchar · Apr 5, 2019 · 0d5f34a · 0d5f34a
2 parents a801697 + ca4a2e0
commit 0d5f34a
Show file tree

Hide file tree

Showing 6 changed files with 73 additions and 47 deletions.
diff --git a/bin/gwdetchar-conlog b/bin/gwdetchar-conlog
@@ -45,7 +45,7 @@ parser = cli.create_parser(description=__doc__)
 cli.add_gps_start_stop_arguments(parser)
 cli.add_ifo_option(parser)
 cli.add_frametype_option(parser, required=const.IFO is None,
-                         default='%s_T'.format(const.IFO))
+                         default='{}_T'.format(const.IFO))
 cli.add_nproc_option(parser)
 parser.add_argument('-o', '--output', default='changes.csv',
                     help='Path to output data file, default: %(default)s')

diff --git a/bin/gwdetchar-lasso-correlation b/bin/gwdetchar-lasso-correlation
@@ -149,7 +149,7 @@ if args.band_pass:
     logger.info("-- Loading primary channel data")
     bandts = get_data(
         primary, start-pad, end+pad, verbose='Reading primary:'.rjust(30),
-        obs=args.ifo[0], frametype=args.primary_frametype, nproc=args.nproc)
+        frametype=args.primary_frametype, nproc=args.nproc)
     if flower < 0 or fupper >= float((bandts.sample_rate/2.).value):
         raise ValueError("bandpass frequency is out of range for this "
                          "channel, band (Hz): {0}, sample rate: {1}".format(
@@ -180,7 +180,7 @@ else:
     # load primary channel data
     logger.info("-- Loading primary channel data")
     primaryts = get_data(primary, start, end, frametype=args.primary_frametype,
-                         obs=args.ifo[0], verbose='Reading:'.rjust(30),
+                         verbose='Reading:'.rjust(30),
                          nproc=args.nproc).crop(start, end)
 
 if args.remove_outliers:
@@ -216,7 +216,7 @@ else:
     frametype = '%s_T' % args.ifo  # for second trends
 
 auxdata = get_data(
-    channels, start, end, verbose='Reading:'.rjust(30), obs=args.ifo[0],
+    channels, start, end, verbose='Reading:'.rjust(30),
     frametype=frametype, nproc=args.nproc, pad=0).crop(start, end)
 
 # -- removes flat data to be re-introdused later

diff --git a/bin/gwdetchar-scattering b/bin/gwdetchar-scattering
@@ -284,7 +284,7 @@ for i, seg in enumerate(statea):
     ) if args.verbose else False
     alldata.append(
         get_data(allchannels, seg[0], seg[1], frametype=args.frametype,
-                 obs=args.ifo[0], verbose=msg, nproc=args.nproc).resample(128))
+                 verbose=msg, nproc=args.nproc).resample(128))
 
 scatter_segments = DataQualityDict()
 actives = SegmentList()

diff --git a/bin/gwdetchar-slow-correlation b/bin/gwdetchar-slow-correlation
@@ -112,7 +112,7 @@ rcParams.update(tex_settings)
 # load data
 logger.info("-- Loading range data")
 rangets = get_data(rangechannel, start, end, frametype=args.range_frametype,
-                   obs=args.ifo[0], verbose=True, nproc=args.nproc)
+                   verbose=True, nproc=args.nproc)
 
 if args.trend_type == 'minute':
     dstart, dend = rangets.span
@@ -121,7 +121,7 @@ else:
     dend = end
 
 logger.info("-- Loading h(t) data")
-darmts = get_data(primary, dstart-pad, dend+pad, verbose=True, obs=args.ifo[0],
+darmts = get_data(primary, dstart-pad, dend+pad, verbose=True,
                   frametype=args.primary_frametype, nproc=args.nproc)
 
 # get darm BLRMS
@@ -185,7 +185,7 @@ if args.trend_type == 'minute':
 else:
     frametype = '%s_T' % args.ifo  # for second trends
 auxdata = get_data(map(str, channels), dstart, dend, verbose=True, pad=0,
-                   obs=args.ifo[0], frametype=frametype, nproc=args.nproc)
+                   frametype=frametype, nproc=args.nproc)
 
 gpsstub = '%d-%d' % (start, end-start)
 re_delim = re.compile('[:_-]')

diff --git a/gwdetchar/io/datafind.py b/gwdetchar/io/datafind.py
@@ -19,7 +19,9 @@
 """gw_data_find wrappers
 """
 
+import re
 import warnings
+from six.moves.urllib.error import HTTPError
 
 import gwdatafind
 
@@ -118,7 +120,7 @@ def remove_missing_channels(channels, gwfcache):
     return list(keep)
 
 
-def get_data(channel, start, end, obs=None, frametype=None, source=None,
+def get_data(channel, start, end, frametype=None, source=None,
              nproc=1, verbose=False, **kwargs):
     """Retrieve data for given channels within a certain time range
 
@@ -133,17 +135,12 @@ def get_data(channel, start, end, obs=None, frametype=None, source=None,
     end : `float`
         GPS end time of requested data
 
-    obs : `str`, optional
-        single-letter name of observatory, defaults to the first letter of
-        `frametype`
-
     frametype : `str`, optional
-        name of frametype in which channel(s) are stored, required if `source`
-        is `None`
+        name of frametype in which channel(s) are stored, default: `None`
 
     source : `str`, `list`, optional
-        `str` path(s) of a LAL-format cache file or individual data file, will
-        supercede `frametype` if given, defaults to `None`
+        path(s) of a LAL-format cache file or individual data file,
+        default: `None`
 
     nproc : `int`, optional
         number of parallel processes to use, uses serial process by default
@@ -153,7 +150,7 @@ def get_data(channel, start, end, obs=None, frametype=None, source=None,
 
     **kwargs : `dict`, optional
         additional keyword arguments to `~gwpy.timeseries.TimeSeries.read`
-        or `~gwpy.timeseries.TimeSeries.fetch`
+        or `~gwpy.timeseries.TimeSeries.get`
 
     Returns
     -------
@@ -166,35 +163,55 @@ def get_data(channel, start, end, obs=None, frametype=None, source=None,
     If `channel` is a `str`, then a `TimeSeries` object will be returned, else
     the result is a `TimeSeriesDict`.
 
+    The `frametype` argument should be used to read from archived frame files,
+    while `source` should be used to read from a local cache or specific data
+    file. If either fails, or if neither is passed, this function will attempt
+    to get data over an NDS server.
+
+    If `frametype` is used to read from the archive, any channels missing
+    from the first or last frame file in the requested time range will be
+    ignored.
+
     See Also
     --------
-    gwpy.timeseries.TimeSeries.fetch
-        for the underlying method to read from an NDS server
+    remove_missing_channels
+        a utility that removes channels missing from the frame archive
+    gwpy.timeseries.TimeSeries.get
+        the underlying method to read data over an NDS server
     gwpy.timeseries.TimeSeries.read
-        for the underlying method to read from a local file cache
+        the underlying method to read data from local files
     """
     # get TimeSeries class
     if isinstance(channel, (list, tuple)):
         series_class = TimeSeriesDict
     else:
         series_class = TimeSeries
-    # construct file cache if none is given
-    if source is None:
-        obs = obs if obs is not None else frametype[0]
-        source = gwdatafind.find_urls(obs, frametype, start, end)
-    # read from frames or NDS
-    if source:
-        if isinstance(channel, (list, tuple)):
-            channel = remove_missing_channels(channel, source)
+
+    if frametype is not None:
+        try:  # locate frame files
+            ifo = re.search('[A-Z]1', frametype).group(0)
+            obs = ifo[0]
+            source = gwdatafind.find_urls(obs, frametype, start, end)
+        except AttributeError:
+            raise AttributeError(
+                'Could not determine observatory from frametype')
+        except HTTPError:  # frame files not found
+            pass
+    if isinstance(source, list) and isinstance(channel, (list, tuple)):
+        channel = remove_missing_channels(channel, source)
+    if source is not None:  # read from frame files
         return series_class.read(
             source, channel, start=start, end=end, nproc=nproc,
             verbose=verbose, **kwargs)
-    elif isinstance(channel, str):
-        return series_class.fetch(
+
+    # read single channel from NDS
+    if not isinstance(channel, (list, tuple)):
+        return series_class.get(
             channel, start, end, verbose=verbose, **kwargs)
+
     # if all else fails, process channels in groups of 60
     data = series_class()
     for group in [channel[i:i + 60] for i in range(0, len(channel), 60)]:
-        data.append(series_class.fetch(
+        data.append(series_class.get(
             group, start, end, verbose=verbose, **kwargs))
     return data
diff --git a/gwdetchar/io/tests/test_datafind.py b/gwdetchar/io/tests/test_datafind.py
@@ -23,6 +23,7 @@
 
 import numpy
 from numpy import testing as nptest
+from six.moves.urllib.error import HTTPError
 
 from gwpy.testing.compat import mock
 from gwpy.timeseries import (TimeSeries, TimeSeriesDict)
@@ -65,41 +66,46 @@ def test_remove_missing_channels(io_gwf):
         assert channels == ['X1:TEST-STRAIN']
 
 
-@mock.patch('gwpy.timeseries.TimeSeries.fetch', return_value=HOFT)
-def test_get_data_from_NDS(tsfetch):
+@mock.patch('gwpy.timeseries.TimeSeries.get', return_value=HOFT)
+def test_get_data_from_NDS(tsget):
     # retrieve data
     start = 0
     end = 64
     channel = 'X1:TEST-STRAIN'
-    data = datafind.get_data(channel, start, end, source=0)
+    data = datafind.get_data(channel, start, end)
 
     # test data products
     assert isinstance(data, TimeSeries)
     nptest.assert_array_equal(data.value, HOFT.value)
 
 
-@mock.patch('gwpy.timeseries.TimeSeriesDict.fetch',
+@mock.patch('gwpy.timeseries.TimeSeriesDict.get',
             return_value=TimeSeriesDict({'X1:TEST-STRAIN': HOFT}))
-def test_get_data_dict_from_NDS(tsdfetch):
+def test_get_data_dict_from_NDS(tsdget):
     # retrieve data
     start = 33
     end = 64
     channels = ['X1:TEST-STRAIN']
-    data = datafind.get_data(channels, start, end, source=0)
+    data = datafind.get_data(channels, start, end)
 
     # test data products
     assert isinstance(data, TimeSeriesDict)
     nptest.assert_array_equal(data['X1:TEST-STRAIN'].value, HOFT.value)
 
 
-@mock.patch('gwpy.timeseries.TimeSeries.read',
-            return_value=HOFT.crop(16, 48))
-def test_get_data_from_cache(tsfetch):
+@mock.patch('gwdatafind.find_urls')
+@mock.patch('gwpy.timeseries.TimeSeries.read')
+def test_get_data_from_cache(tsget, find_data):
+    # set return values
+    find_data.return_value = ['test.gwf']
+    tsget.return_value = HOFT.crop(16, 48)
+
     # retrieve test frame
     start = 16
     end = start + 32
     channel = 'X1:TEST-STRAIN'
-    data = datafind.get_data(channel, start, end, source=True)
+    frametype = 'X1_TEST'
+    data = datafind.get_data(channel, start, end, frametype=frametype)
 
     # test data products
     assert isinstance(data, TimeSeries)
@@ -108,13 +114,15 @@ def test_get_data_from_cache(tsfetch):
     nptest.assert_array_equal(data.value, HOFT.crop(start, end).value)
 
 
+@mock.patch('gwdatafind.find_urls')
 @mock.patch('gwdetchar.io.datafind.remove_missing_channels')
 @mock.patch('gwpy.timeseries.TimeSeriesDict.read')
-def test_get_data_dict_from_cache(tsdfetch, remove):
+def test_get_data_dict_from_cache(tsdget, remove, find_data):
     # set return values
-    tsdfetch.return_value = TimeSeriesDict({
+    tsdget.return_value = TimeSeriesDict({
         'X1:TEST-STRAIN': HOFT.crop(16, 48)})
     remove.return_value = ['X1:TEST-STRAIN']
+    find_data.return_value = ['test.gwf']
     # retrieve test frame
     start = 16
     end = start + 32
@@ -129,7 +137,8 @@ def test_get_data_dict_from_cache(tsdfetch, remove):
                               HOFT.crop(start, end).value)
 
 
-def test_fail_on_no_frametype():
+def test_get_data_bad_frametype():
     channel = 'X1:TEST-STRAIN'
-    with pytest.raises(TypeError):
-        datafind.get_data(channel, start=0, end=32)
+    with pytest.raises(AttributeError) as exc:
+        datafind.get_data(channel, start=0, end=32, frametype='bad_frametype')
+    assert 'Could not determine observatory' in str(exc.value)