Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Fido client and filetype attr #31

Merged
merged 18 commits into from
Nov 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
run: |
pip install -e .[test,docs]
- name: Test
run: pytest
run: pytest --remote-data=any
- name: Build docs
run: |
cd docs
Expand Down
8 changes: 8 additions & 0 deletions docs/code_reference/eispac_net.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
eispc net
=========

.. automodapi:: eispac.net
:no-heading:

.. automodapi:: eispac.net.attrs
:headings: ^"
3 changes: 3 additions & 0 deletions eispac/net/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .client import *

__all__ = ['EISClient']
28 changes: 28 additions & 0 deletions eispac/net/attrs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from sunpy.net.attr import SimpleAttr

__all__ = ['FileType']


class FileType(SimpleAttr):
"""
Specifies the type of EIS level 1 file

Parameters
----------
value: `str`
Possible values are "HDF5 data" or "HDF5 header" to retrieve the
data and header files, respectively, in HDF5 format, or "FITS" to
retrieve the FITS files. Inputs are not case sensitive.
"""

def __init__(self, value):
if not isinstance(value, str):
raise ValueError('File type must be a string')
value = value.lower()
if 'hdf5' in value:
value = '.'.join([value[5:], 'h5'])
if value == 'header.h5':
value = 'head.h5'
if value not in ['data.h5', 'head.h5', 'fits']:
raise ValueError(f'File type {value} must be either "HDF5 data", "HDF5 header", or "FITS".')
super().__init__(value)
137 changes: 137 additions & 0 deletions eispac/net/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
from sunpy.net import attrs as a
from sunpy.net.dataretriever import GenericClient, QueryResponse
from sunpy.net.scraper import Scraper
from sunpy.time import TimeRange

from eispac.net.attrs import FileType

__all__ = ['EISClient']


class EISClient(GenericClient):
"""
Provides access to the level 1 EIS data in HDF5 and FITS format.

This data is hosted by the `Naval Research Laboratory <https://eis.nrl.navy.mil/>`__.

Examples
--------
>>> from sunpy.net import Fido, attrs as a
>>> import eispac.net
>>> from eispac.net.attrs import FileType
>>> results = Fido.search(a.Time('2020-11-09 00:00:00','2020-11-09 01:00:00'),
... a.Instrument('EIS'),
... a.Physobs.intensity,
... a.Source('Hinode'),
... a.Provider('NRL'),
... a.Level('1')) #doctest: +REMOTE_DATA
>>> results #doctest: +REMOTE_DATA
<sunpy.net.fido_factory.UnifiedResponse object at ...>
Results from 1 Provider:
<BLANKLINE>
3 Results from the EISClient:
Source: https://eis.nrl.navy.mil/
<BLANKLINE>
Start Time End Time ... Level FileType
----------------------- ----------------------- ... ----- -----------
2020-11-09 00:10:12.000 2020-11-09 00:10:12.999 ... 1 HDF5 data
2020-11-09 00:10:12.000 2020-11-09 00:10:12.999 ... 1 HDF5 header
2020-11-09 00:10:12.000 2020-11-09 00:10:12.999 ... 1 FITS
<BLANKLINE>
<BLANKLINE>
>>> results = Fido.search(a.Time('2020-11-09 00:00:00','2020-11-09 01:00:00'),
... a.Instrument('EIS'),
... a.Physobs.intensity,
... a.Source('Hinode'),
... a.Provider('NRL'),
... a.Level('1'),
... FileType('HDF5 header')) #doctest: +REMOTE_DATA
>>> results #doctest: +REMOTE_DATA
<sunpy.net.fido_factory.UnifiedResponse object at ...>
Results from 1 Provider:
<BLANKLINE>
1 Results from the EISClient:
Source: https://eis.nrl.navy.mil/
<BLANKLINE>
Start Time End Time ... Level FileType
----------------------- ----------------------- ... ----- -----------
2020-11-09 00:10:12.000 2020-11-09 00:10:12.999 ... 1 HDF5 header
<BLANKLINE>
<BLANKLINE>
"""
baseurl_hdf5 = r'https://eis.nrl.navy.mil/level1/hdf5/%Y/%m/%d/eis_%Y%m%d_%H%M%S.(\w){4}.h5'
pattern_hdf5 = '{}/{year:4d}/{month:2d}/{day:2d}/eis_{:8d}_{hour:2d}{minute:2d}{second:2d}.{FileType}'
baseurl_fits = r'https://eis.nrl.navy.mil/level1/fits/%Y/%m/%d/eis_er_%Y%m%d_%H%M%S.fits'
pattern_fits = '{}/{year:4d}/{month:2d}/{day:2d}/eis_er_{:8d}_{hour:2d}{minute:2d}{second:2d}.{FileType}'

@property
def info_url(self):
wtbarnes marked this conversation as resolved.
Show resolved Hide resolved
return 'https://eis.nrl.navy.mil/'

def search(self, *args, **kwargs):
# NOTE: Search is overridden because URL and pattern depending on the filetype.
# This enables multiple filetypes to be returned in the same query.
metalist = []
matchdict = self._get_match_dict(*args, **kwargs)
all_filetypes = matchdict.get('FileType')
for ft in all_filetypes:
if 'h5' in ft:
baseurl = self.baseurl_hdf5
pattern = self.pattern_hdf5
else:
baseurl = self.baseurl_fits
pattern = self.pattern_fits

scraper = Scraper(baseurl, regex=True)
tr = TimeRange(matchdict['Start Time'], matchdict['End Time'])
filesmeta = scraper._extract_files_meta(tr, extractor=pattern, matcher={'FileType': ft})
filesmeta = sorted(filesmeta, key=lambda k: k['url'])
for i in filesmeta:
rowdict = self.post_search_hook(i, matchdict)
metalist.append(rowdict)

return QueryResponse(metalist, client=self)

def post_search_hook(self, i, matchdict):
# This makes the final display names of the file types nicer
filetype_mapping = {
'data.h5': 'HDF5 data',
'head.h5': 'HDF5 header',
'fits': 'FITS',
}
rd = super().post_search_hook(i, matchdict)
rd['FileType'] = filetype_mapping[rd['FileType']]
return rd

@classmethod
def register_values(cls):
return {
a.Instrument: [('EIS', 'Extreme Ultraviolet Imaging Spectrometer')],
a.Physobs: [('intensity', 'Spectrally resolved intensity in detector units')],
a.Source: [('Hinode', 'The Hinode mission is a partnership between JAXA, NASA, and UKSA')],
a.Provider: [('NRL', 'U.S. Naval Research Laboratory')],
a.Level: [
('1', 'EIS: The EIS client can only return level 1 data. Level 0 EIS data is available from the VSO.')
],
FileType: [('data.h5', 'These files contain the actual intensity data in HDF5 format.'),
('head.h5', 'These files contain only the header metadata in HDF5 format.'),
('fits', 'These files contain both data and metadata in FITS format')],
}

@classmethod
def _attrs_module(cls):
# Register EIS specific attributes with Fido
return 'eispac', 'eispac.net.attrs'

@classmethod
def _can_handle_query(cls, *query):
"""
Check if this client can handle a given Fido query.
Returns
-------
bool
True if this client can handle the given query.
"""
required = {a.Time, a.Instrument, a.Source}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need to make Source required?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There could be another future instrument called EIS? 🤷

Copy link

@Cadair Cadair Nov 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That doesn't mean you need to support source, if someone didn't provide source then they would get both. They could optionally use source to disambiguate.

(It just needs moving to optional).

optional = {a.Provider, a.Physobs, a.Level, FileType}
return cls.check_attr_types_in_query(query, required, optional)
61 changes: 61 additions & 0 deletions eispac/net/tests/test_eis_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import pytest
from sunpy.net import Fido, attrs as a
import eispac.net


@pytest.fixture
def eis_query():
time = a.Time('2022-03-29 22:21:00','2022-03-29 23:21:00')
instr = a.Instrument('EIS')
obs = a.Physobs.intensity
source = a.Source('Hinode')
provider = a.Provider('NRL')
level = a.Level('1')
return time, instr, obs, source, provider, level


@pytest.mark.remote_data
def test_search_all_types(eis_query):
q = Fido.search(*eis_query)
assert len(q) == 1
assert len(q[0]) == 3
assert q[0,0]['url'] == 'https://eis.nrl.navy.mil/level1/hdf5/2022/03/29/eis_20220329_222113.data.h5'


@pytest.mark.remote_data
def test_search_fits_only(eis_query):
q = Fido.search(*eis_query, a.eispac.FileType('FITS'))
assert len(q) == 1
assert len(q[0]) == 1
assert q[0,0]['url'] == 'https://eis.nrl.navy.mil/level1/fits/2022/03/29/eis_er_20220329_222113.fits'


@pytest.mark.parametrize('file_type, file_url', [
('FITS', 'https://eis.nrl.navy.mil/level1/fits/2022/03/29/eis_er_20220329_222113.fits'),
('HDF5 data', 'https://eis.nrl.navy.mil/level1/hdf5/2022/03/29/eis_20220329_222113.data.h5'),
('HDF5 header', 'https://eis.nrl.navy.mil/level1/hdf5/2022/03/29/eis_20220329_222113.head.h5')
])
@pytest.mark.remote_data
def test_search_individual_filetypes(eis_query, file_type, file_url):
q = Fido.search(*eis_query, a.eispac.FileType(file_type))
assert len(q) == 1
assert len(q[0]) == 1
assert q[0,0]['url'] == file_url
assert q[0,0]['FileType'] == file_type


@pytest.mark.remote_data
def test_combined_hdf5_search(eis_query):
q = Fido.search(*eis_query,
a.eispac.FileType('HDF5 data') | a.eispac.FileType('HDF5 header'))
assert len(q) == 2
assert len(q[0]) == 1
assert len(q[1]) == 1
assert q[0,0]['FileType'] == 'HDF5 data'
assert q[1,0]['FileType'] == 'HDF5 header'


def test_registered_attrs():
attr_names = ['fits', 'data_h5', 'head_h5']
for an in attr_names:
assert hasattr(a.eispac.FileType, an)
2 changes: 1 addition & 1 deletion eispac/util/rot_xy.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def rot_xy(xcen, ycen, start_time, end_time):

Examples
--------
>>> new = rot_xy(0, 0, start_time='01-JAN-2021 00:00', end_time='01-JAN-2021 01:00')
>>> new = rot_xy(0, 0, start_time='2021-JAN-01 00:00', end_time='2021-JAN-01 01:00')
>>> print(new.Tx, new.Ty)
9.47188arcsec 0.0809565arcsec
"""
Expand Down
12 changes: 11 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,15 @@ install_requires =
matplotlib>=3.1
h5py>=2.9
astropy>=3.1
sunpy[map]>=2.1
sunpy[net,map]>=2.1
ndcube>=2.0.0
parfive>=1.5
python-dateutil>=2.8

[options.extras_require]
test =
pytest>=4.6.3
pytest-astropy
docs =
sphinx==4.0.2 # why is this pinned so strictly?
sphinx-automodapi>=0.13
Expand All @@ -47,3 +48,12 @@ docs =
[options.package_data]
eispac.data.test = *.h5
eispac.data.templates = *.template.*

[tool:pytest]
testpaths = "eispac"
norecursedirs = ".tox" "build" "docs[\/]_build" "docs[\/]generated" "*.egg-info" "examples" ".jupyter" ".history" "tools"
doctest_plus = enabled
text_file_format = rst
addopts = --doctest-rst
doctest_optionflags = NORMALIZE_WHITESPACE FLOAT_CMP ELLIPSIS
remote_data_strict = True