From d9d75f7d613a84b9c131e29824f0d56d8d5e6bdc Mon Sep 17 00:00:00 2001 From: pralabhkumar Date: Thu, 14 Oct 2021 16:48:53 +0900 Subject: [PATCH] [SPARK-32161][PYTHON] Removing JVM logs from SparkUpgradeException ### What changes were proposed in this pull request? Hide JVM traceback for SparkUpgradeException Following PR will result into ``` from pyspark.sql.functions import to_date, unix_timestamp, from_unixtime df2 = df.select('date_str',to_date(from_unixtime(unix_timestamp('date_str', 'yyyy-dd-aa')))) df2.show(1, False) raise converted from None pyspark.sql.utils.SparkUpgradeException: You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyy-dd-aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html ``` ### Why are the changes needed? This change will remove JVM traceback for pyspark in SparkUpgradeException. This will help to have stack trace more pythonic way ### Does this PR introduce any user-facing change? Yes user will be able to see only python stacktrace ### How was this patch tested? unit tests Closes #34275 from pralabhkumar/rk_spark_upgrade_exception. Authored-by: pralabhkumar Signed-off-by: Hyukjin Kwon --- python/pyspark/sql/tests/test_utils.py | 11 ++++++++++- python/pyspark/sql/utils.py | 8 ++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py index 10b579dbf103e..69f1688ab6e96 100644 --- a/python/pyspark/sql/tests/test_utils.py +++ b/python/pyspark/sql/tests/test_utils.py @@ -17,8 +17,10 @@ # from pyspark.sql.functions import sha2 -from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException +from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException, \ + SparkUpgradeException from pyspark.testing.sqlutils import ReusedSQLTestCase +from pyspark.sql.functions import to_date, unix_timestamp, from_unixtime class UtilsTests(ReusedSQLTestCase): @@ -33,6 +35,13 @@ def test_capture_user_friendly_exception(self): except AnalysisException as e: self.assertRegex(str(e), "Column '`中文字段`' does not exist") + def test_spark_upgrade_exception(self): + # SPARK-32161 : Test case to Handle SparkUpgradeException in pythonic way + df = self.spark.createDataFrame([("2014-31-12",)], ['date_str']) + df2 = df.select('date_str', + to_date(from_unixtime(unix_timestamp('date_str', 'yyyy-dd-aa')))) + self.assertRaises(SparkUpgradeException, df2.collect) + def test_capture_parse_exception(self): self.assertRaises(ParseException, lambda: self.spark.sql("abc")) diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py index 578cf71378203..61846aadf8c25 100644 --- a/python/pyspark/sql/utils.py +++ b/python/pyspark/sql/utils.py @@ -112,6 +112,12 @@ class UnknownException(CapturedException): """ +class SparkUpgradeException(CapturedException): + """ + Exception thrown because of Spark upgrade + """ + + def convert_exception(e): assert e is not None assert SparkContext._jvm is not None @@ -131,6 +137,8 @@ def convert_exception(e): return QueryExecutionException(origin=e) elif is_instance_of(gw, e, 'java.lang.IllegalArgumentException'): return IllegalArgumentException(origin=e) + elif is_instance_of(gw, e, 'org.apache.spark.SparkUpgradeException'): + return SparkUpgradeException(origin=e) c = e.getCause() stacktrace = jvm.org.apache.spark.util.Utils.exceptionString(e)