[SPARK-32161][PYTHON] Removing JVM logs from SparkUpgradeException

### What changes were proposed in this pull request? Hide JVM traceback for SparkUpgradeException Following PR will result into ``` from pyspark.sql.functions import to_date, unix_timestamp, from_unixtime df2 = df.select('date_str',to_date(from_unixtime(unix_timestamp('date_str', 'yyyy-dd-aa')))) df2.show(1, False) raise converted from None pyspark.sql.utils.SparkUpgradeException: You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyy-dd-aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html ``` ### Why are the changes needed? This change will remove JVM traceback for pyspark in SparkUpgradeException. This will help to have stack trace more pythonic way ### Does this PR introduce any user-facing change? Yes user will be able to see only python stacktrace ### How was this patch tested? unit tests Closes #34275 from pralabhkumar/rk_spark_upgrade_exception. Authored-by: pralabhkumar <pralabhkumar@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
apache · Oct 14, 2021 · d9d75f7 · d9d75f7
1 parent 4a722dc
commit d9d75f7
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 1 deletion.
diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py
@@ -17,8 +17,10 @@
 #
 
 from pyspark.sql.functions import sha2
-from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException
+from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException, \
+    SparkUpgradeException
 from pyspark.testing.sqlutils import ReusedSQLTestCase
+from pyspark.sql.functions import to_date, unix_timestamp, from_unixtime
 
 
 class UtilsTests(ReusedSQLTestCase):
@@ -33,6 +35,13 @@ def test_capture_user_friendly_exception(self):
         except AnalysisException as e:
             self.assertRegex(str(e), "Column '`中文字段`' does not exist")
 
+    def test_spark_upgrade_exception(self):
+        # SPARK-32161 : Test case to Handle SparkUpgradeException in pythonic way
+        df = self.spark.createDataFrame([("2014-31-12",)], ['date_str'])
+        df2 = df.select('date_str',
+                        to_date(from_unixtime(unix_timestamp('date_str', 'yyyy-dd-aa'))))
+        self.assertRaises(SparkUpgradeException, df2.collect)
+
     def test_capture_parse_exception(self):
         self.assertRaises(ParseException, lambda: self.spark.sql("abc"))
 

diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
@@ -112,6 +112,12 @@ class UnknownException(CapturedException):
     """
 
 
+class SparkUpgradeException(CapturedException):
+    """
+    Exception thrown because of Spark upgrade
+    """
+
+
 def convert_exception(e):
     assert e is not None
     assert SparkContext._jvm is not None
@@ -131,6 +137,8 @@ def convert_exception(e):
         return QueryExecutionException(origin=e)
     elif is_instance_of(gw, e, 'java.lang.IllegalArgumentException'):
         return IllegalArgumentException(origin=e)
+    elif is_instance_of(gw, e, 'org.apache.spark.SparkUpgradeException'):
+        return SparkUpgradeException(origin=e)
 
     c = e.getCause()
     stacktrace = jvm.org.apache.spark.util.Utils.exceptionString(e)