Skip to content

Commit

Permalink
Merge ca4a2e0 into a801697
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex L. Urban committed Apr 5, 2019
2 parents a801697 + ca4a2e0 commit 0d5f34a
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 47 deletions.
2 changes: 1 addition & 1 deletion bin/gwdetchar-conlog
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ parser = cli.create_parser(description=__doc__)
cli.add_gps_start_stop_arguments(parser)
cli.add_ifo_option(parser)
cli.add_frametype_option(parser, required=const.IFO is None,
default='%s_T'.format(const.IFO))
default='{}_T'.format(const.IFO))
cli.add_nproc_option(parser)
parser.add_argument('-o', '--output', default='changes.csv',
help='Path to output data file, default: %(default)s')
Expand Down
6 changes: 3 additions & 3 deletions bin/gwdetchar-lasso-correlation
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ if args.band_pass:
logger.info("-- Loading primary channel data")
bandts = get_data(
primary, start-pad, end+pad, verbose='Reading primary:'.rjust(30),
obs=args.ifo[0], frametype=args.primary_frametype, nproc=args.nproc)
frametype=args.primary_frametype, nproc=args.nproc)
if flower < 0 or fupper >= float((bandts.sample_rate/2.).value):
raise ValueError("bandpass frequency is out of range for this "
"channel, band (Hz): {0}, sample rate: {1}".format(
Expand Down Expand Up @@ -180,7 +180,7 @@ else:
# load primary channel data
logger.info("-- Loading primary channel data")
primaryts = get_data(primary, start, end, frametype=args.primary_frametype,
obs=args.ifo[0], verbose='Reading:'.rjust(30),
verbose='Reading:'.rjust(30),
nproc=args.nproc).crop(start, end)

if args.remove_outliers:
Expand Down Expand Up @@ -216,7 +216,7 @@ else:
frametype = '%s_T' % args.ifo # for second trends

auxdata = get_data(
channels, start, end, verbose='Reading:'.rjust(30), obs=args.ifo[0],
channels, start, end, verbose='Reading:'.rjust(30),
frametype=frametype, nproc=args.nproc, pad=0).crop(start, end)

# -- removes flat data to be re-introdused later
Expand Down
2 changes: 1 addition & 1 deletion bin/gwdetchar-scattering
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ for i, seg in enumerate(statea):
) if args.verbose else False
alldata.append(
get_data(allchannels, seg[0], seg[1], frametype=args.frametype,
obs=args.ifo[0], verbose=msg, nproc=args.nproc).resample(128))
verbose=msg, nproc=args.nproc).resample(128))

scatter_segments = DataQualityDict()
actives = SegmentList()
Expand Down
6 changes: 3 additions & 3 deletions bin/gwdetchar-slow-correlation
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ rcParams.update(tex_settings)
# load data
logger.info("-- Loading range data")
rangets = get_data(rangechannel, start, end, frametype=args.range_frametype,
obs=args.ifo[0], verbose=True, nproc=args.nproc)
verbose=True, nproc=args.nproc)

if args.trend_type == 'minute':
dstart, dend = rangets.span
Expand All @@ -121,7 +121,7 @@ else:
dend = end

logger.info("-- Loading h(t) data")
darmts = get_data(primary, dstart-pad, dend+pad, verbose=True, obs=args.ifo[0],
darmts = get_data(primary, dstart-pad, dend+pad, verbose=True,
frametype=args.primary_frametype, nproc=args.nproc)

# get darm BLRMS
Expand Down Expand Up @@ -185,7 +185,7 @@ if args.trend_type == 'minute':
else:
frametype = '%s_T' % args.ifo # for second trends
auxdata = get_data(map(str, channels), dstart, dend, verbose=True, pad=0,
obs=args.ifo[0], frametype=frametype, nproc=args.nproc)
frametype=frametype, nproc=args.nproc)

gpsstub = '%d-%d' % (start, end-start)
re_delim = re.compile('[:_-]')
Expand Down
65 changes: 41 additions & 24 deletions gwdetchar/io/datafind.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
"""gw_data_find wrappers
"""

import re
import warnings
from six.moves.urllib.error import HTTPError

import gwdatafind

Expand Down Expand Up @@ -118,7 +120,7 @@ def remove_missing_channels(channels, gwfcache):
return list(keep)


def get_data(channel, start, end, obs=None, frametype=None, source=None,
def get_data(channel, start, end, frametype=None, source=None,
nproc=1, verbose=False, **kwargs):
"""Retrieve data for given channels within a certain time range
Expand All @@ -133,17 +135,12 @@ def get_data(channel, start, end, obs=None, frametype=None, source=None,
end : `float`
GPS end time of requested data
obs : `str`, optional
single-letter name of observatory, defaults to the first letter of
`frametype`
frametype : `str`, optional
name of frametype in which channel(s) are stored, required if `source`
is `None`
name of frametype in which channel(s) are stored, default: `None`
source : `str`, `list`, optional
`str` path(s) of a LAL-format cache file or individual data file, will
supercede `frametype` if given, defaults to `None`
path(s) of a LAL-format cache file or individual data file,
default: `None`
nproc : `int`, optional
number of parallel processes to use, uses serial process by default
Expand All @@ -153,7 +150,7 @@ def get_data(channel, start, end, obs=None, frametype=None, source=None,
**kwargs : `dict`, optional
additional keyword arguments to `~gwpy.timeseries.TimeSeries.read`
or `~gwpy.timeseries.TimeSeries.fetch`
or `~gwpy.timeseries.TimeSeries.get`
Returns
-------
Expand All @@ -166,35 +163,55 @@ def get_data(channel, start, end, obs=None, frametype=None, source=None,
If `channel` is a `str`, then a `TimeSeries` object will be returned, else
the result is a `TimeSeriesDict`.
The `frametype` argument should be used to read from archived frame files,
while `source` should be used to read from a local cache or specific data
file. If either fails, or if neither is passed, this function will attempt
to get data over an NDS server.
If `frametype` is used to read from the archive, any channels missing
from the first or last frame file in the requested time range will be
ignored.
See Also
--------
gwpy.timeseries.TimeSeries.fetch
for the underlying method to read from an NDS server
remove_missing_channels
a utility that removes channels missing from the frame archive
gwpy.timeseries.TimeSeries.get
the underlying method to read data over an NDS server
gwpy.timeseries.TimeSeries.read
for the underlying method to read from a local file cache
the underlying method to read data from local files
"""
# get TimeSeries class
if isinstance(channel, (list, tuple)):
series_class = TimeSeriesDict
else:
series_class = TimeSeries
# construct file cache if none is given
if source is None:
obs = obs if obs is not None else frametype[0]
source = gwdatafind.find_urls(obs, frametype, start, end)
# read from frames or NDS
if source:
if isinstance(channel, (list, tuple)):
channel = remove_missing_channels(channel, source)

if frametype is not None:
try: # locate frame files
ifo = re.search('[A-Z]1', frametype).group(0)
obs = ifo[0]
source = gwdatafind.find_urls(obs, frametype, start, end)
except AttributeError:
raise AttributeError(
'Could not determine observatory from frametype')
except HTTPError: # frame files not found
pass
if isinstance(source, list) and isinstance(channel, (list, tuple)):
channel = remove_missing_channels(channel, source)
if source is not None: # read from frame files
return series_class.read(
source, channel, start=start, end=end, nproc=nproc,
verbose=verbose, **kwargs)
elif isinstance(channel, str):
return series_class.fetch(

# read single channel from NDS
if not isinstance(channel, (list, tuple)):
return series_class.get(
channel, start, end, verbose=verbose, **kwargs)

# if all else fails, process channels in groups of 60
data = series_class()
for group in [channel[i:i + 60] for i in range(0, len(channel), 60)]:
data.append(series_class.fetch(
data.append(series_class.get(
group, start, end, verbose=verbose, **kwargs))
return data
39 changes: 24 additions & 15 deletions gwdetchar/io/tests/test_datafind.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import numpy
from numpy import testing as nptest
from six.moves.urllib.error import HTTPError

from gwpy.testing.compat import mock
from gwpy.timeseries import (TimeSeries, TimeSeriesDict)
Expand Down Expand Up @@ -65,41 +66,46 @@ def test_remove_missing_channels(io_gwf):
assert channels == ['X1:TEST-STRAIN']


@mock.patch('gwpy.timeseries.TimeSeries.fetch', return_value=HOFT)
def test_get_data_from_NDS(tsfetch):
@mock.patch('gwpy.timeseries.TimeSeries.get', return_value=HOFT)
def test_get_data_from_NDS(tsget):
# retrieve data
start = 0
end = 64
channel = 'X1:TEST-STRAIN'
data = datafind.get_data(channel, start, end, source=0)
data = datafind.get_data(channel, start, end)

# test data products
assert isinstance(data, TimeSeries)
nptest.assert_array_equal(data.value, HOFT.value)


@mock.patch('gwpy.timeseries.TimeSeriesDict.fetch',
@mock.patch('gwpy.timeseries.TimeSeriesDict.get',
return_value=TimeSeriesDict({'X1:TEST-STRAIN': HOFT}))
def test_get_data_dict_from_NDS(tsdfetch):
def test_get_data_dict_from_NDS(tsdget):
# retrieve data
start = 33
end = 64
channels = ['X1:TEST-STRAIN']
data = datafind.get_data(channels, start, end, source=0)
data = datafind.get_data(channels, start, end)

# test data products
assert isinstance(data, TimeSeriesDict)
nptest.assert_array_equal(data['X1:TEST-STRAIN'].value, HOFT.value)


@mock.patch('gwpy.timeseries.TimeSeries.read',
return_value=HOFT.crop(16, 48))
def test_get_data_from_cache(tsfetch):
@mock.patch('gwdatafind.find_urls')
@mock.patch('gwpy.timeseries.TimeSeries.read')
def test_get_data_from_cache(tsget, find_data):
# set return values
find_data.return_value = ['test.gwf']
tsget.return_value = HOFT.crop(16, 48)

# retrieve test frame
start = 16
end = start + 32
channel = 'X1:TEST-STRAIN'
data = datafind.get_data(channel, start, end, source=True)
frametype = 'X1_TEST'
data = datafind.get_data(channel, start, end, frametype=frametype)

# test data products
assert isinstance(data, TimeSeries)
Expand All @@ -108,13 +114,15 @@ def test_get_data_from_cache(tsfetch):
nptest.assert_array_equal(data.value, HOFT.crop(start, end).value)


@mock.patch('gwdatafind.find_urls')
@mock.patch('gwdetchar.io.datafind.remove_missing_channels')
@mock.patch('gwpy.timeseries.TimeSeriesDict.read')
def test_get_data_dict_from_cache(tsdfetch, remove):
def test_get_data_dict_from_cache(tsdget, remove, find_data):
# set return values
tsdfetch.return_value = TimeSeriesDict({
tsdget.return_value = TimeSeriesDict({
'X1:TEST-STRAIN': HOFT.crop(16, 48)})
remove.return_value = ['X1:TEST-STRAIN']
find_data.return_value = ['test.gwf']
# retrieve test frame
start = 16
end = start + 32
Expand All @@ -129,7 +137,8 @@ def test_get_data_dict_from_cache(tsdfetch, remove):
HOFT.crop(start, end).value)


def test_fail_on_no_frametype():
def test_get_data_bad_frametype():
channel = 'X1:TEST-STRAIN'
with pytest.raises(TypeError):
datafind.get_data(channel, start=0, end=32)
with pytest.raises(AttributeError) as exc:
datafind.get_data(channel, start=0, end=32, frametype='bad_frametype')
assert 'Could not determine observatory' in str(exc.value)

0 comments on commit 0d5f34a

Please sign in to comment.