From b20f1e76e6d269bc9f4b7ceec1dd70d024dcb43f Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Mon, 24 Nov 2014 14:14:51 -0800 Subject: [PATCH] fix asDict() with nested Row() --- python/pyspark/sql.py | 2 +- python/pyspark/tests.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index abb284d1e3dd9..ae288471b0e51 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -1178,7 +1178,7 @@ class Row(tuple): def asDict(self): """ Return as a dict """ - return dict(zip(self.__FIELDS__, self)) + return dict((n, getattr(self, n)) for n in self.__FIELDS__) def __repr__(self): # call collect __repr__ for nested objects diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index a01bd8d415787..29bcd38908d10 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -803,7 +803,7 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): ReusedPySparkTestCase.tearDownClass() - shutil.rmtree(cls.tempdir.name) + shutil.rmtree(cls.tempdir.name, ignore_errors=True) def setUp(self): self.sqlCtx = SQLContext(self.sc) @@ -930,8 +930,9 @@ def test_convert_row_to_dict(self): rdd = self.sc.parallelize([row]) srdd = self.sqlCtx.inferSchema(rdd) srdd.registerTempTable("test") - row = self.sqlCtx.sql("select l[0].a AS la from test").first() - self.assertEqual(1, row.asDict()["la"]) + row = self.sqlCtx.sql("select l, d from test").first() + self.assertEqual(1, row.asDict()["l"][0].a) + self.assertEqual(1.0, row.asDict()['d']['key'].c) def test_infer_schema_with_udt(self): from pyspark.tests import ExamplePoint, ExamplePointUDT