diff --git a/python/pyspark/context.py b/python/pyspark/context.py index a2c76ad218069..1c9998c0ef4ff 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -808,7 +808,7 @@ def _add_profile(self, id, profileAcc): def show_profiles(self): """ Print the profile stats to stdout """ - for i, (id, acc, showed) in self._profile_stats: + for i, (id, acc, showed) in enumerate(self._profile_stats): stats = acc.value if not showed and stats: print "=" * 60 diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index cd493a5f93b48..bf3cf94d5d083 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -615,16 +615,22 @@ def test_profiler(self): def heavy_foo(x): for i in range(1 << 20): x = 1 - rdd = self.sc.parallelize(range(100)).foreach(heavy_foo) + rdd = self.sc.parallelize(range(100)) + rdd.foreach(heavy_foo) profiles = self.sc._profile_stats self.assertEqual(1, len(profiles)) - id, acc, _ = profiles.pop() + id, acc, _ = profiles[0] stats = acc.value self.assertTrue(stats is not None) width, stat_list = stats.get_print_list([]) func_names = [func_name for fname, n, func_name in stat_list] self.assertTrue("heavy_foo" in func_names) + self.sc.show_profiles() + d = tempfile.gettempdir() + self.sc.dump_profiles(d) + self.assertTrue("rdd_%d.pstats" % id in os.listdir(d)) + class TestSQL(PySparkTestCase):