SPARK-24915: Adding handling for RDD-style Rows.

apache · Oct 15, 2019 · a52de2e · a52de2e
1 parent 431409d
commit a52de2e
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 1 deletion.
diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
@@ -250,6 +250,17 @@ def test_create_dataframe_from_rows_with_nested_row(self):
                                      ('field1', 'string')])
         self.assertEqual(df.first().asDict(), row.asDict())
 
+    def test_create_dataframe_from_tuple_rows(self):
+        data = [Row('Alice', datetime.date(2014, 5, 26)),
+                Row('Bob', datetime.date(2016, 7, 26))]
+        schema = StructType([
+            StructField("name", StringType(), False),
+            StructField("join_date", DateType(), False),
+        ])
+        df = self.spark.createDataFrame(data, schema=schema)
+        self.assertEqual(df.dtypes, [("name", "string"), ("join_date", "date")])
+        self.assertEqual(df.first(), Row('Alice', datetime.date(2014, 5, 26)))
+
     def test_apply_schema(self):
         from datetime import date, datetime
         rdd = self.sc.parallelize([(127, -128, -32768, 32767, 2147483647, 1.0,

diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
@@ -599,7 +599,7 @@ def toInternal(self, obj):
             if isinstance(obj, dict):
                 return tuple(f.toInternal(obj.get(n)) if c else obj.get(n)
                              for n, f, c in zip(self.names, self.fields, self._needConversion))
-            elif isinstance(obj, Row):
+            elif isinstance(obj, Row) and getattr(obj, "__from_dict__", False):
                 return self.toInternal(obj.asDict())
             elif isinstance(obj, (tuple, list)):
                 return tuple(f.toInternal(v) if c else v