Skip to content

Commit

Permalink
datasets.find_datasets: added filtering by type
Browse files Browse the repository at this point in the history
  • Loading branch information
duncanmmacleod committed Mar 14, 2018
1 parent ca85102 commit 600f19b
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 7 deletions.
4 changes: 3 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ To search for available datasets (correct as of March 14 2018):
>>> from gwopensci import datasets
>>> datasets.find_datasets()
['GW150914', 'GW151226', 'GW170104', 'GW170608', 'GW170814', 'GW170817', 'LVT151012', 'O1', 'S5', 'S6']
>>> find_datasets('V1')
>>> datasets.find_datasets(detector='V1')
['GW170814', 'GW170817']
>>> datasets.find_datasets(type='run')
['O1', 'S5', 'S6']
To query for the GPS time of an event dataset (or vice-versa):

Expand Down
47 changes: 41 additions & 6 deletions gwopensci/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,58 @@
__author__ = 'Duncan Macleod <duncan.macleod@ligo.org>'


def find_datasets(detector=None, host=api.DEFAULT_URL):
def find_datasets(detector=None, type=None, host=api.DEFAULT_URL):
"""Find datasets available on the given GW open science host
Parameters
----------
detector : `str`, optional
prefix of GW detector
type : `str`, optional
type of datasets to restrict, one of ``'run'`` or ``'event'``
host : `str`, optional
the URL of the LOSC host to query, defaults to losc.ligo.org
Returns
-------
datasets : `list` of `str`
the names of all matched datasets, possibly empty
Examples
--------
(Correct as of 2018-03-14)
>>> from gwopensci.datasets import find_datasets
>>> find_datasets()
['GW150914', 'GW151226', 'GW170104', 'GW170608', 'GW170814', 'GW170817',
'LVT151012', 'O1', 'S5', 'S6']
>>> find_datasets('V1')
>>> find_datasets(detector='V1')
['GW170814', 'GW170817']
>>> find_datasets(type='event')
['GW150914', 'GW151226', 'GW170104', 'GW170608', 'GW170814', 'GW170817',
'LVT151012']
"""
# format type
if type not in [None, 'run', 'event']:
raise ValueError('unrecognised type {!r}, select one of \'run\', '
'\'event\''.format(type))
if type and not type.endswith('s'):
type += 's'

# search
meta = api.fetch_dataset_json(0, api.MAX_GPS, host=host)
return sorted(
epoch for type_ in meta for epoch, metadata in meta[type_].items() if
epoch != 'tenyear' and (detector is None or
detector in metadata['detectors']))
names = []
for type_ in meta:
if type and type_ != type:
continue
for epoch, metadata in meta[type_].items():
if epoch == 'tenyear':
continue
if detector is None or detector in metadata['detectors']:
names.append(epoch)
return sorted(names)


def event_gps(event, host=api.DEFAULT_URL):
Expand Down
10 changes: 10 additions & 0 deletions gwopensci/test_gwopensci.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"""

import os.path
import re

import pytest

Expand Down Expand Up @@ -113,6 +114,15 @@ def test_find_datasets():

assert gwopensci_datasets.find_datasets('X1') == []

runsets = gwopensci_datasets.find_datasets(type='run')
assert 'O1' in runsets
run_regex = re.compile('\A[OS]\d+\Z')
for dset in runsets:
assert run_regex.match(dset)

with pytest.raises(ValueError):
gwopensci_datasets.find_datasets(type='badtype')


def test_event_gps():
assert gwopensci_datasets.event_gps('GW170817') == 1187008882.43
Expand Down

0 comments on commit 600f19b

Please sign in to comment.