Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[io.ascii.cds] Fix reading of multi-line CDS descriptions where the continued line starts with a number #15617

Merged
merged 9 commits into from Dec 12, 2023
29 changes: 25 additions & 4 deletions astropy/io/ascii/cds.py
Expand Up @@ -15,7 +15,7 @@
import re
from contextlib import suppress

from astropy.units import Unit
from astropy.units import Unit, UnitsWarning, UnrecognizedUnit

from . import core, fixedwidth

Expand Down Expand Up @@ -130,11 +130,30 @@
if unit == "---":
col.unit = None # "---" is the marker for no unit in CDS/MRT table
else:
col.unit = Unit(unit, format="cds", parse_strict="warn")
try:
col.unit = Unit(unit, format="cds", parse_strict="warn")
except UnitsWarning:

Check warning on line 135 in astropy/io/ascii/cds.py

View check run for this annotation

Codecov / codecov/patch

astropy/io/ascii/cds.py#L135

Added line #L135 was not covered by tests
# catch when warnings are turned into errors so we can check
# whether this line is likely a multi-line description (see below)
col.unit = UnrecognizedUnit(unit)

Check warning on line 138 in astropy/io/ascii/cds.py

View check run for this annotation

Codecov / codecov/patch

astropy/io/ascii/cds.py#L138

Added line #L138 was not covered by tests
col.description = (match.group("descr") or "").strip()
col.raw_type = match.group("format")
col.type = self.get_col_type(col)

try:
col.type = self.get_col_type(col)
except ValueError:

Check warning on line 143 in astropy/io/ascii/cds.py

View check run for this annotation

Codecov / codecov/patch

astropy/io/ascii/cds.py#L143

Added line #L143 was not covered by tests
# If parsing the format fails and the unit is unrecognized,
# then this line is likely a continuation of the previous col's
# description that happens to start with a number
if isinstance(col.unit, UnrecognizedUnit):
if len(cols[-1].description) > 0:
cols[-1].description += " "
cols[-1].description += line.strip()
continue

Check warning on line 151 in astropy/io/ascii/cds.py

View check run for this annotation

Codecov / codecov/patch

astropy/io/ascii/cds.py#L147-L151

Added lines #L147 - L151 were not covered by tests
else:
if col.unit is not None:
# Because we may have ignored a UnitsWarning turned into an error
# we do this again so it can be raised again if it is a real error
col.unit = Unit(unit, format="cds", parse_strict="warn")
match = re.match(
# Matches limits specifier (eg []) that may or may not be
# present
Expand Down Expand Up @@ -173,6 +192,8 @@
cols.append(col)
else: # could be a continuation of the previous col's description
if cols:
if len(cols[-1].description) > 0:
cols[-1].description += " "
cols[-1].description += line.strip()
else:
raise ValueError(f'Line "{line}" not parsable as CDS header')
Expand Down
3 changes: 2 additions & 1 deletion astropy/io/ascii/tests/data/cds/description/ReadMe
Expand Up @@ -54,7 +54,8 @@ Byte-by-byte Description of file: table.dat
24 I1 --- ion ?=0
- Ionization stage (1 for neutral element)
26- 30 F5.2 eV chiEx Excitation potential
32- 37 F6.2 --- loggf Logarithm of the oscillator strength
32- 37 F6.2 --- loggf log10 of the gf value - logarithm base
10 of stat. weight times oscillator strength
39- 43 F5.1 0.1pm EW ?=-9.9 Equivalent width (in mA)
46- 49 F4.1 0.1pm e_EW ?=-9.9 rms uncertainty on EW
51- 56 F6.3 --- Q ?=-9.999 DAOSPEC quality parameter Q
Expand Down
11 changes: 8 additions & 3 deletions astropy/io/ascii/tests/test_cds_header_from_readme.py
Expand Up @@ -37,14 +37,19 @@ def test_description():
assert_equal(len(table), 2)
assert_equal(table["Cluster"].description, "Cluster name")
assert_equal(table["Star"].description, "")
assert_equal(table["Wave"].description, "wave? Wavelength in Angstroms")
assert_equal(table["Wave"].description, "wave ? Wavelength in Angstroms")
jobovy marked this conversation as resolved.
Show resolved Hide resolved
assert_equal(table["El"].description, "a")
assert_equal(
table["ion"].description, "- Ionization stage (1 for neutral element)"
)
assert_equal(
table["loggf"].description,
"log10 of the gf value - logarithm base 10 of stat. weight times "
"oscillator strength",
)
assert_equal(table["EW"].description, "Equivalent width (in mA)")
assert_equal(
table["Q"].description, "DAOSPEC quality parameter Q(large values are bad)"
table["Q"].description, "DAOSPEC quality parameter Q (large values are bad)"
)


Expand Down Expand Up @@ -229,7 +234,7 @@ def test_cds_no_whitespace():
assert_equal(r.header.cols[7].null, "-9.9")
assert_equal(
r.header.cols[10].description,
"DAOSPEC quality parameter Q(large values are bad)",
"DAOSPEC quality parameter Q (large values are bad)",
)
assert_equal(r.header.cols[10].null, "-9.999")

Expand Down
1 change: 1 addition & 0 deletions docs/changes/io.ascii/15617.bugfix.rst
@@ -0,0 +1 @@
Reading of CDS header files with multi-line descriptions where the continued line started with a number was broken. This is now fixed.