Skip to content

Commit

Permalink
Handle and test fixed-size binary, list
Browse files Browse the repository at this point in the history
  • Loading branch information
ianmcook committed May 16, 2024
1 parent 713c3f7 commit c43f26c
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 0 deletions.
4 changes: 4 additions & 0 deletions python/pyspark/sql/pandas/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,8 @@ def from_arrow_type(at: "pa.DataType", prefer_timestamp_ntz: bool = False) -> Da
spark_type = StringType()
elif types.is_binary(at):
spark_type = BinaryType()
elif types.is_fixed_size_binary(at):
spark_type = BinaryType()
elif types.is_large_binary(at):
spark_type = BinaryType()
elif types.is_date32(at):
Expand All @@ -219,6 +221,8 @@ def from_arrow_type(at: "pa.DataType", prefer_timestamp_ntz: bool = False) -> Da
spark_type = DayTimeIntervalType()
elif types.is_list(at):
spark_type = ArrayType(from_arrow_type(at.value_type, prefer_timestamp_ntz))
elif types.is_fixed_size_list(at):
spark_type = ArrayType(from_arrow_type(at.value_type, prefer_timestamp_ntz))
elif types.is_large_list(at):
spark_type = ArrayType(from_arrow_type(at.value_type, prefer_timestamp_ntz))
elif types.is_map(at):
Expand Down
6 changes: 6 additions & 0 deletions python/pyspark/sql/tests/connect/test_parity_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,12 @@ def test_createDataFrame_arrow_large_list(self):
def test_createDataFrame_arrow_large_list_int64_offset(self):
super().test_createDataFrame_arrow_large_list_int64_offset()

def test_createDataFrame_arrow_fixed_size_binary(self):
super().test_createDataFrame_arrow_fixed_size_binary()

def test_createDataFrame_arrow_fixed_size_list(self):
super().test_createDataFrame_arrow_fixed_size_list()


if __name__ == "__main__":
from pyspark.sql.tests.connect.test_parity_arrow import * # noqa: F401
Expand Down
12 changes: 12 additions & 0 deletions python/pyspark/sql/tests/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1568,6 +1568,18 @@ def test_createDataFrame_arrow_large_list_int64_offset(self):
with self.assertRaises(Exception):
self.spark.createDataFrame(t)

def test_createDataFrame_arrow_fixed_size_binary(self):
a = pa.array(["a"] * 5, type=pa.binary(1))
t = pa.table([a], ["fsb"])
df = self.spark.createDataFrame(t)
self.assertIsInstance(df.schema["fsb"].dataType, BinaryType)

def test_createDataFrame_arrow_fixed_size_list(self):
a = pa.array([[-1, 3]] * 5, type=pa.list_(pa.int32(), 2))
t = pa.table([a], ["fsl"])
df = self.spark.createDataFrame(t)
self.assertIsInstance(df.schema["fsl"].dataType, ArrayType)


@unittest.skipIf(
not have_pandas or not have_pyarrow,
Expand Down

0 comments on commit c43f26c

Please sign in to comment.