Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Table io null fix #14723

Merged
merged 11 commits into from Nov 3, 2023
13 changes: 12 additions & 1 deletion astropy/io/fits/connect.py
Expand Up @@ -265,21 +265,32 @@ def read_table_fits(
# string, empty strings.
# Since Multi-element columns with dtypes such as '2f8' have a subdtype,
# we should look up the type of column on that.
# Also propagate TNULL (for ints) or the FITS default null value for
# floats and strings to the column's fill_value to ensure round trips
# preserve null values.
masked = mask = False
fill_value = None
coltype = col.dtype.subdtype[0].type if col.dtype.subdtype else col.dtype.type
if col.null is not None:
saimn marked this conversation as resolved.
Show resolved Hide resolved
mask = data[col.name] == col.null
# Return a MaskedColumn even if no elements are masked so
# we roundtrip better.
masked = True
fill_value = col.null
elif mask_invalid and issubclass(coltype, np.inexact):
mask = np.isnan(data[col.name])
fill_value = np.NaN
taldcroft marked this conversation as resolved.
Show resolved Hide resolved
elif mask_invalid and issubclass(coltype, np.character):
mask = col.array == b""
fill_value = b""

if masked or np.any(mask):
column = MaskedColumn(
data=data[col.name], name=col.name, mask=mask, copy=False
data=data[col.name],
name=col.name,
mask=mask,
copy=False,
fill_value=fill_value,
)
else:
column = Column(data=data[col.name], name=col.name, copy=False)
Expand Down
23 changes: 23 additions & 0 deletions astropy/io/fits/tests/test_connect.py
Expand Up @@ -1076,3 +1076,26 @@ def test_keep_masked_state_integer_columns(tmp_path):
assert not isinstance(tr["a"], MaskedColumn)
assert not isinstance(tr["b"], MaskedColumn)
assert isinstance(tr["c"], MaskedColumn)


def test_null_propagation_in_table_read(tmp_path):
"""Checks that integer columns with a TNULL value set (e.g. masked columns)
have their TNULL value propagated when being read in by Table.read"""

# Could be anything except for 999999, which is the "default" fill_value
# for masked int arrays
NULL_VALUE = -1

output_filename = tmp_path / "null_table.fits"

data = np.asarray([1, 2, NULL_VALUE, 4], dtype=np.int32)

# Create table with BinTableHDU, with integer column containing a custom null
c = fits.Column(name="a", array=data, null=NULL_VALUE, format="J")
hdu = BinTableHDU.from_columns([c])
hdu.writeto(output_filename)

# Read the table in with Table.read, and ensure the column's fill_value is
# equal to NULL_VALUE
t = Table.read(output_filename)
assert t["a"].fill_value == NULL_VALUE
6 changes: 6 additions & 0 deletions astropy/table/table.py
Expand Up @@ -678,6 +678,12 @@ def as_array(self, keep_byteorder=False, names=None):
if masked and has_info_class(col, MixinInfo) and hasattr(col, "mask"):
data[col.info.name].mask = col.mask

# Propagate the fill_value from the table column to the output array.
# If this is not done, then the output array will use numpy.ma's default
# fill values (999999 for ints, 1E20 for floats, "N/A" for strings)
if masked and hasattr(col, "fill_value"):
data[col.info.name].fill_value = col.fill_value

return data

def __init__(
Expand Down
59 changes: 59 additions & 0 deletions astropy/table/tests/test_table.py
Expand Up @@ -3289,3 +3289,62 @@ def test_add_list_order():
array = np.empty((20, 1))
t.add_columns(array, names=names)
assert t.colnames == names


def test_table_write_preserves_nulls(tmp_path):
"""Ensures that upon writing a table, the fill_value attribute of a
masked (integer) column is correctly propagated into the TNULL parameter
in the FITS header"""

# Could be anything except for 999999, which is the "default" fill_value
# for masked int arrays
NULL_VALUE = -1

# Create table with an integer MaskedColumn with custom fill_value
c1 = MaskedColumn(
name="a",
data=np.asarray([1, 2, 3], dtype=np.int32),
mask=[True, False, True],
fill_value=NULL_VALUE,
)
t = Table([c1])

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a specific unit test of the Table.as_array() functionality here by doing tn = t.as_array() and confirm the correct fill_value?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done (see aab7ab6)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems this commit was lost in one of the rebase ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I rebased and added the commit.

table_filename = tmp_path / "nultable.fits"

# Write the table out with Table.write()
t.write(table_filename)

# Open the output file, and check the TNULL parameter is NULL_VALUE
with fits.open(table_filename) as hdul:
header = hdul[1].header

assert header["TNULL1"] == NULL_VALUE


def test_as_array_preserve_fill_value():
"""Ensures that Table.as_array propagates a MaskedColumn's fill_value to
the output array"""

INT_FILL = 123
FLOAT_FILL = 123.0
STR_FILL = "xyz"
CMPLX_FILL = complex(3.14, 2.71)

# set up a table with some columns with different data types
c_int = MaskedColumn(name="int", data=[1, 2, 3], fill_value=INT_FILL)
c_float = MaskedColumn(name="float", data=[1.0, 2.0, 3.0], fill_value=FLOAT_FILL)
c_str = MaskedColumn(name="str", data=["abc", "def", "ghi"], fill_value=STR_FILL)
c_cmplx = MaskedColumn(
name="cmplx",
data=[complex(1, 0), complex(0, 1), complex(1, 1)],
fill_value=CMPLX_FILL,
)

t = Table([c_int, c_float, c_str, c_cmplx])

tn = t.as_array()

assert tn["int"].fill_value == INT_FILL
assert tn["float"].fill_value == FLOAT_FILL
assert tn["str"].fill_value == STR_FILL
assert tn["cmplx"].fill_value == CMPLX_FILL
1 change: 1 addition & 0 deletions docs/changes/io.fits/14723.bugfix.rst
@@ -0,0 +1 @@
Reading a table from FITS now respects the TNULL property of a column, passing it into the column's `fill_value`
1 change: 1 addition & 0 deletions docs/changes/table/14723.bugfix.rst
@@ -0,0 +1 @@
`Table.as_array` now respects the `fill_value` property of masked columns
gpsgibb marked this conversation as resolved.
Show resolved Hide resolved