[SPARK-46255][PYTHON][CONNECT] Support complex type -> string conversion

### What changes were proposed in this pull request? Support complex type -> string conversion ### Why are the changes needed? to support `list -> str` conversion ### Does this PR introduce _any_ user-facing change? yes ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes apache#44171 from zhengruifeng/py_connect_str_conv. Authored-by: Ruifeng Zheng <ruifengz@apache.org> Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
dbatomic · Dec 11, 2023 · da83a8c · da83a8c
1 parent 4dc3c7b
commit da83a8c
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 16 deletions.
diff --git a/python/pyspark/sql/connect/conversion.py b/python/pyspark/sql/connect/conversion.py
@@ -222,22 +222,6 @@ def convert_string(value: Any) -> Any:
                 if value is None:
                     return None
                 else:
-                    # only atomic types are supported
-                    assert isinstance(
-                        value,
-                        (
-                            bool,
-                            int,
-                            float,
-                            str,
-                            bytes,
-                            bytearray,
-                            decimal.Decimal,
-                            datetime.date,
-                            datetime.datetime,
-                            datetime.timedelta,
-                        ),
-                    )
                     if isinstance(value, bool):
                         # To match the PySpark which convert bool to string in
                         # the JVM side (python.EvaluatePython.makeFromJava)

diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
@@ -507,6 +507,19 @@ def test_convert_row_to_dict(self):
             self.assertEqual(1, row.asDict()["l"][0].a)
             self.assertEqual(1.0, row.asDict()["d"]["key"].c)
 
+    def test_convert_list_to_str(self):
+        data = [[[123], 120]]
+        schema = StructType(
+            [
+                StructField("name", StringType(), True),
+                StructField("income", LongType(), True),
+            ]
+        )
+        df = self.spark.createDataFrame(data, schema)
+        self.assertEqual(df.schema, schema)
+        self.assertEqual(df.count(), 1)
+        self.assertEqual(df.head(), Row(name="[123]", income=120))
+
     def test_udt(self):
         from pyspark.sql.types import _parse_datatype_json_string, _infer_type, _make_type_verifier