Skip to content

Commit

Permalink
Merge pull request #14723 from gpsgibb/table-io-null-fix
Browse files Browse the repository at this point in the history
Table io null fix
  • Loading branch information
saimn committed Nov 3, 2023
2 parents f1baf3c + 6be5cac commit be40cc4
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 1 deletion.
13 changes: 12 additions & 1 deletion astropy/io/fits/connect.py
Expand Up @@ -265,21 +265,32 @@ def read_table_fits(
# string, empty strings.
# Since Multi-element columns with dtypes such as '2f8' have a subdtype,
# we should look up the type of column on that.
# Also propagate TNULL (for ints) or the FITS default null value for
# floats and strings to the column's fill_value to ensure round trips
# preserve null values.
masked = mask = False
fill_value = None
coltype = col.dtype.subdtype[0].type if col.dtype.subdtype else col.dtype.type
if col.null is not None:
mask = data[col.name] == col.null
# Return a MaskedColumn even if no elements are masked so
# we roundtrip better.
masked = True
fill_value = col.null
elif mask_invalid and issubclass(coltype, np.inexact):
mask = np.isnan(data[col.name])
fill_value = np.nan
elif mask_invalid and issubclass(coltype, np.character):
mask = col.array == b""
fill_value = b""

if masked or np.any(mask):
column = MaskedColumn(
data=data[col.name], name=col.name, mask=mask, copy=False
data=data[col.name],
name=col.name,
mask=mask,
copy=False,
fill_value=fill_value,
)
else:
column = Column(data=data[col.name], name=col.name, copy=False)
Expand Down
23 changes: 23 additions & 0 deletions astropy/io/fits/tests/test_connect.py
Expand Up @@ -1076,3 +1076,26 @@ def test_keep_masked_state_integer_columns(tmp_path):
assert not isinstance(tr["a"], MaskedColumn)
assert not isinstance(tr["b"], MaskedColumn)
assert isinstance(tr["c"], MaskedColumn)


def test_null_propagation_in_table_read(tmp_path):
"""Checks that integer columns with a TNULL value set (e.g. masked columns)
have their TNULL value propagated when being read in by Table.read"""

# Could be anything except for 999999, which is the "default" fill_value
# for masked int arrays
NULL_VALUE = -1

output_filename = tmp_path / "null_table.fits"

data = np.asarray([1, 2, NULL_VALUE, 4], dtype=np.int32)

# Create table with BinTableHDU, with integer column containing a custom null
c = fits.Column(name="a", array=data, null=NULL_VALUE, format="J")
hdu = BinTableHDU.from_columns([c])
hdu.writeto(output_filename)

# Read the table in with Table.read, and ensure the column's fill_value is
# equal to NULL_VALUE
t = Table.read(output_filename)
assert t["a"].fill_value == NULL_VALUE
6 changes: 6 additions & 0 deletions astropy/table/table.py
Expand Up @@ -678,6 +678,12 @@ def as_array(self, keep_byteorder=False, names=None):
if masked and has_info_class(col, MixinInfo) and hasattr(col, "mask"):
data[col.info.name].mask = col.mask

# Propagate the fill_value from the table column to the output array.
# If this is not done, then the output array will use numpy.ma's default
# fill values (999999 for ints, 1E20 for floats, "N/A" for strings)
if masked and hasattr(col, "fill_value"):
data[col.info.name].fill_value = col.fill_value

return data

def __init__(
Expand Down
59 changes: 59 additions & 0 deletions astropy/table/tests/test_table.py
Expand Up @@ -3289,3 +3289,62 @@ def test_add_list_order():
array = np.empty((20, 1))
t.add_columns(array, names=names)
assert t.colnames == names


def test_table_write_preserves_nulls(tmp_path):
"""Ensures that upon writing a table, the fill_value attribute of a
masked (integer) column is correctly propagated into the TNULL parameter
in the FITS header"""

# Could be anything except for 999999, which is the "default" fill_value
# for masked int arrays
NULL_VALUE = -1

# Create table with an integer MaskedColumn with custom fill_value
c1 = MaskedColumn(
name="a",
data=np.asarray([1, 2, 3], dtype=np.int32),
mask=[True, False, True],
fill_value=NULL_VALUE,
)
t = Table([c1])

table_filename = tmp_path / "nultable.fits"

# Write the table out with Table.write()
t.write(table_filename)

# Open the output file, and check the TNULL parameter is NULL_VALUE
with fits.open(table_filename) as hdul:
header = hdul[1].header

assert header["TNULL1"] == NULL_VALUE


def test_as_array_preserve_fill_value():
"""Ensures that Table.as_array propagates a MaskedColumn's fill_value to
the output array"""

INT_FILL = 123
FLOAT_FILL = 123.0
STR_FILL = "xyz"
CMPLX_FILL = complex(3.14, 2.71)

# set up a table with some columns with different data types
c_int = MaskedColumn(name="int", data=[1, 2, 3], fill_value=INT_FILL)
c_float = MaskedColumn(name="float", data=[1.0, 2.0, 3.0], fill_value=FLOAT_FILL)
c_str = MaskedColumn(name="str", data=["abc", "def", "ghi"], fill_value=STR_FILL)
c_cmplx = MaskedColumn(
name="cmplx",
data=[complex(1, 0), complex(0, 1), complex(1, 1)],
fill_value=CMPLX_FILL,
)

t = Table([c_int, c_float, c_str, c_cmplx])

tn = t.as_array()

assert tn["int"].fill_value == INT_FILL
assert tn["float"].fill_value == FLOAT_FILL
assert tn["str"].fill_value == STR_FILL
assert tn["cmplx"].fill_value == CMPLX_FILL
2 changes: 2 additions & 0 deletions docs/changes/io.fits/14723.bugfix.rst
@@ -0,0 +1,2 @@
Reading a table from FITS now respects the TNULL property of a column, passing
it into the column's ``fill_value``.
1 change: 1 addition & 0 deletions docs/changes/table/14723.bugfix.rst
@@ -0,0 +1 @@
``Table.as_array`` now respects the ``fill_value`` property of masked columns.

0 comments on commit be40cc4

Please sign in to comment.