From 7c7266208a3be984ac1ce53747dc0c3640f4ecac Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 18 Sep 2017 13:20:11 +0900 Subject: [PATCH] [SPARK-22043][PYTHON] Improves error message for show_profiles and dump_profiles ## What changes were proposed in this pull request? This PR proposes to improve error message from: ``` >>> sc.show_profiles() Traceback (most recent call last): File "", line 1, in File ".../spark/python/pyspark/context.py", line 1000, in show_profiles self.profiler_collector.show_profiles() AttributeError: 'NoneType' object has no attribute 'show_profiles' >>> sc.dump_profiles("/tmp/abc") Traceback (most recent call last): File "", line 1, in File ".../spark/python/pyspark/context.py", line 1005, in dump_profiles self.profiler_collector.dump_profiles(path) AttributeError: 'NoneType' object has no attribute 'dump_profiles' ``` to ``` >>> sc.show_profiles() Traceback (most recent call last): File "", line 1, in File ".../spark/python/pyspark/context.py", line 1003, in show_profiles raise RuntimeError("'spark.python.profile' configuration must be set " RuntimeError: 'spark.python.profile' configuration must be set to 'true' to enable Python profile. >>> sc.dump_profiles("/tmp/abc") Traceback (most recent call last): File "", line 1, in File ".../spark/python/pyspark/context.py", line 1012, in dump_profiles raise RuntimeError("'spark.python.profile' configuration must be set " RuntimeError: 'spark.python.profile' configuration must be set to 'true' to enable Python profile. ``` ## How was this patch tested? Unit tests added in `python/pyspark/tests.py` and manual tests. Author: hyukjinkwon Closes #19260 from HyukjinKwon/profile-errors. --- python/pyspark/context.py | 12 ++++++++++-- python/pyspark/tests.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/python/pyspark/context.py b/python/pyspark/context.py index a7046043e0376..a33f6dcf31fc0 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -997,12 +997,20 @@ def runJob(self, rdd, partitionFunc, partitions=None, allowLocal=False): def show_profiles(self): """ Print the profile stats to stdout """ - self.profiler_collector.show_profiles() + if self.profiler_collector is not None: + self.profiler_collector.show_profiles() + else: + raise RuntimeError("'spark.python.profile' configuration must be set " + "to 'true' to enable Python profile.") def dump_profiles(self, path): """ Dump the profile stats into directory `path` """ - self.profiler_collector.dump_profiles(path) + if self.profiler_collector is not None: + self.profiler_collector.dump_profiles(path) + else: + raise RuntimeError("'spark.python.profile' configuration must be set " + "to 'true' to enable Python profile.") def getConf(self): conf = SparkConf() diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 3c108ec92ccc9..da99872da2f0e 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -1296,6 +1296,22 @@ def heavy_foo(x): rdd.foreach(heavy_foo) +class ProfilerTests2(unittest.TestCase): + def test_profiler_disabled(self): + sc = SparkContext(conf=SparkConf().set("spark.python.profile", "false")) + try: + self.assertRaisesRegexp( + RuntimeError, + "'spark.python.profile' configuration must be set", + lambda: sc.show_profiles()) + self.assertRaisesRegexp( + RuntimeError, + "'spark.python.profile' configuration must be set", + lambda: sc.dump_profiles("/tmp/abc")) + finally: + sc.stop() + + class InputFormatTests(ReusedPySparkTestCase): @classmethod