astropy · mhvk · Apr 18, 2023 · Feb 8, 2023 · Feb 8, 2023 · Feb 8, 2023
@@ -440,16 +440,24 @@ def write_table_parquet(table, output, overwrite=False):
         arrays = []
         for name in encode_table.dtype.names:
             dt = encode_table.dtype[name]
+            # Parquet must be stored little-endian.  When we use astype(..., copy=False)
+            # we get a very fast conversion when the dtype is unchanged, and only
+            # incur a cost when we need to do a byte-swap operation.
+            dt_new = dt.newbyteorder("<")
             if dt.type == np.object_:
                 # Turn the column into a list of numpy arrays.
-                val = [row for row in encode_table[name]]
+                val = [row.astype(dt_new, copy=False) for row in encode_table[name]]
             elif len(dt.shape) > 0:
                 if len(encode_table) > 0:
-                    val = np.split(encode_table[name].ravel(), len(encode_table))
+                    val = np.split(
+                        encode_table[name].ravel().astype(dt_new.base, copy=False),
+                        len(encode_table),
+                    )
                 else:
                     val = []
             else:
-                val = encode_table[name]
+                val = encode_table[name].astype(dt_new, copy=False)
+
             arrays.append(pa.array(val, type=schema.field(name).type))
 
         # Create a pyarrow table from the list of arrays and the schema

@@ -43,6 +43,13 @@
     "U3",
 ]
 
+BIGENDIAN_DTYPES = [
+    np.dtype(">i4"),
+    np.dtype(">i8"),
+    np.dtype(">f4"),
+    np.dtype(">f8"),
+]
+
 
 def _default_values(dtype):
     if dtype == np.bool_:
@@ -218,6 +225,25 @@ def test_preserve_single_dtypes(tmp_path, dtype):
     assert t2["a"].dtype == dtype
 
 
+@pytest.mark.parametrize("dtype", BIGENDIAN_DTYPES)
+def test_preserve_single_bigendian_dtypes(tmp_path, dtype):
+    """Test that round-tripping a single big-endian column preserves data."""
+
+    test_file = tmp_path / "test.parquet"
+
+    values = _default_values(dtype)
+
+    t1 = Table()
+    t1.add_column(Column(name="a", data=np.array(values, dtype=dtype)))
+    t1.write(test_file)
+
+    t2 = Table.read(test_file)
+
+    assert np.all(t2["a"] == values)
+    # The parquet serialization will turn all arrays into little-endian.
+    assert t2["a"].dtype == dtype.newbyteorder("<")
+
+
 @pytest.mark.parametrize("dtype", ALL_DTYPES)
 def test_preserve_single_array_dtypes(tmp_path, dtype):
     """Test that round-tripping a single array column preserves datatypes."""
@@ -237,6 +263,25 @@ def test_preserve_single_array_dtypes(tmp_path, dtype):
     assert t2["a"].dtype == dtype
 
 
+@pytest.mark.parametrize("dtype", BIGENDIAN_DTYPES)
+def test_preserve_single_bigendian_array_dtypes(tmp_path, dtype):
+    """Test that round-tripping a single array column (big-endian) preserves data."""
+
+    test_file = tmp_path / "test.parquet"
+
+    values = _default_array_values(dtype)
+
+    t1 = Table()
+    t1.add_column(Column(name="a", data=np.array(values, dtype=dtype)))
+    t1.write(test_file)
+
+    t2 = Table.read(test_file)
+
+    assert np.all(t2["a"] == t1["a"])
+    assert np.all(t2["a"].shape == np.array(values).shape)
+    assert t2["a"].dtype == dtype.newbyteorder("<")
+
+
 @pytest.mark.parametrize("dtype", ALL_DTYPES)
 def test_preserve_single_var_length_array_dtypes(tmp_path, dtype):
     """
@@ -260,6 +305,29 @@ def test_preserve_single_var_length_array_dtypes(tmp_path, dtype):
         assert row1.dtype == row2.dtype
 
 
+@pytest.mark.parametrize("dtype", BIGENDIAN_DTYPES)
+def test_preserve_single_bigendian_var_length_array_dtypes(tmp_path, dtype):
+    """
+    Test that round-tripping a single big-endian variable length array column preserves
+    datatypes.
+    """
+
+    test_file = tmp_path / "test.parquet"
+
+    values = _default_var_length_array_values(dtype)
+
+    t1 = Table()
+    data = np.array([np.array(val, dtype=dtype) for val in values], dtype=np.object_)
+    t1.add_column(Column(name="a", data=data))
+    t1.write(test_file)
+
+    t2 = Table.read(test_file)
+
+    for row1, row2 in zip(t1["a"], t2["a"]):
+        assert np.all(row1 == row2)
+        assert row1.dtype.newbyteorder(">") == row2.dtype.newbyteorder(">")
+
+
 def test_preserve_all_dtypes(tmp_path):
     """Test that round-tripping preserves a table with all the datatypes."""
 

diff --git a/docs/changes/io.misc/14373.bugfix.rst b/docs/changes/io.misc/14373.bugfix.rst
@@ -0,0 +1 @@
+Columns with big-endian byte ordering (such as those read in from a FITS table) can now be serialized with Parquet.