Skip to content

Commit

Permalink
SPARK-24915: Adding handling for RDD-style Rows.
Browse files Browse the repository at this point in the history
  • Loading branch information
jhereth committed Oct 15, 2019
1 parent 431409d commit a52de2e
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
11 changes: 11 additions & 0 deletions python/pyspark/sql/tests/test_types.py
Expand Up @@ -250,6 +250,17 @@ def test_create_dataframe_from_rows_with_nested_row(self):
('field1', 'string')])
self.assertEqual(df.first().asDict(), row.asDict())

def test_create_dataframe_from_tuple_rows(self):
data = [Row('Alice', datetime.date(2014, 5, 26)),
Row('Bob', datetime.date(2016, 7, 26))]
schema = StructType([
StructField("name", StringType(), False),
StructField("join_date", DateType(), False),
])
df = self.spark.createDataFrame(data, schema=schema)
self.assertEqual(df.dtypes, [("name", "string"), ("join_date", "date")])
self.assertEqual(df.first(), Row('Alice', datetime.date(2014, 5, 26)))

def test_apply_schema(self):
from datetime import date, datetime
rdd = self.sc.parallelize([(127, -128, -32768, 32767, 2147483647, 1.0,
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/types.py
Expand Up @@ -599,7 +599,7 @@ def toInternal(self, obj):
if isinstance(obj, dict):
return tuple(f.toInternal(obj.get(n)) if c else obj.get(n)
for n, f, c in zip(self.names, self.fields, self._needConversion))
elif isinstance(obj, Row):
elif isinstance(obj, Row) and getattr(obj, "__from_dict__", False):
return self.toInternal(obj.asDict())
elif isinstance(obj, (tuple, list)):
return tuple(f.toInternal(v) if c else v
Expand Down

0 comments on commit a52de2e

Please sign in to comment.