Skip to content

Commit

Permalink
[SPARK-10305] [SQL] fix create DataFrame from Python class
Browse files Browse the repository at this point in the history
cc jkbradley

Author: Davies Liu <davies@databricks.com>

Closes #8470 from davies/fix_create_df.

(cherry picked from commit d41d6c4)
Signed-off-by: Davies Liu <davies.liu@gmail.com>
  • Loading branch information
Davies Liu authored and davies committed Aug 26, 2015
1 parent efbd7af commit 0bdb800
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 0 deletions.
12 changes: 12 additions & 0 deletions python/pyspark/sql/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,12 @@ class PythonOnlyPoint(ExamplePoint):
__UDT__ = PythonOnlyUDT()


class MyObject(object):
def __init__(self, key, value):
self.key = key
self.value = value


class DataTypeTests(unittest.TestCase):
# regression test for SPARK-6055
def test_data_type_eq(self):
Expand Down Expand Up @@ -383,6 +389,12 @@ def test_infer_nested_schema(self):
df = self.sqlCtx.inferSchema(rdd)
self.assertEquals(Row(field1=1, field2=u'row1'), df.first())

def test_create_dataframe_from_objects(self):
data = [MyObject(1, "1"), MyObject(2, "2")]
df = self.sqlCtx.createDataFrame(data)
self.assertEqual(df.dtypes, [("key", "bigint"), ("value", "string")])
self.assertEqual(df.first(), Row(key=1, value="1"))

def test_select_null_literal(self):
df = self.sqlCtx.sql("select null as col")
self.assertEquals(Row(col=None), df.first())
Expand Down
6 changes: 6 additions & 0 deletions python/pyspark/sql/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,13 +537,19 @@ def toInternal(self, obj):
return tuple(f.toInternal(obj.get(n)) for n, f in zip(self.names, self.fields))
elif isinstance(obj, (tuple, list)):
return tuple(f.toInternal(v) for f, v in zip(self.fields, obj))
elif hasattr(obj, "__dict__"):
d = obj.__dict__
return tuple(f.toInternal(d.get(n)) for n, f in zip(self.names, self.fields))
else:
raise ValueError("Unexpected tuple %r with StructType" % obj)
else:
if isinstance(obj, dict):
return tuple(obj.get(n) for n in self.names)
elif isinstance(obj, (list, tuple)):
return tuple(obj)
elif hasattr(obj, "__dict__"):
d = obj.__dict__
return tuple(d.get(n) for n in self.names)
else:
raise ValueError("Unexpected tuple %r with StructType" % obj)

Expand Down

0 comments on commit 0bdb800

Please sign in to comment.