From d9d75f7d613a84b9c131e29824f0d56d8d5e6bdc Mon Sep 17 00:00:00 2001
From: pralabhkumar <pralabhkumar@gmail.com>
Date: Thu, 14 Oct 2021 16:48:53 +0900
Subject: [PATCH] [SPARK-32161][PYTHON] Removing JVM logs from
 SparkUpgradeException

### What changes were proposed in this pull request?

Hide JVM traceback for SparkUpgradeException
Following PR will result into

```
from pyspark.sql.functions import to_date, unix_timestamp, from_unixtime
df2 = df.select('date_str',to_date(from_unixtime(unix_timestamp('date_str', 'yyyy-dd-aa'))))
df2.show(1, False)

 raise converted from None

pyspark.sql.utils.SparkUpgradeException: You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyy-dd-aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html

 ```
  ### Why are the changes needed?

This change will remove JVM traceback for pyspark in  SparkUpgradeException. This will help to have stack trace more pythonic way

 ### Does this PR introduce any user-facing change?

Yes user will be able to see only python stacktrace

 ### How was this patch tested?

unit tests

Closes #34275 from pralabhkumar/rk_spark_upgrade_exception.

Authored-by: pralabhkumar <pralabhkumar@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/test_utils.py | 11 ++++++++++-
 python/pyspark/sql/utils.py            |  8 ++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py
index 10b579dbf103e..69f1688ab6e96 100644
--- a/python/pyspark/sql/tests/test_utils.py
+++ b/python/pyspark/sql/tests/test_utils.py
@@ -17,8 +17,10 @@
 #
 
 from pyspark.sql.functions import sha2
-from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException
+from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException, \
+    SparkUpgradeException
 from pyspark.testing.sqlutils import ReusedSQLTestCase
+from pyspark.sql.functions import to_date, unix_timestamp, from_unixtime
 
 
 class UtilsTests(ReusedSQLTestCase):
@@ -33,6 +35,13 @@ def test_capture_user_friendly_exception(self):
         except AnalysisException as e:
             self.assertRegex(str(e), "Column '`中文字段`' does not exist")
 
+    def test_spark_upgrade_exception(self):
+        # SPARK-32161 : Test case to Handle SparkUpgradeException in pythonic way
+        df = self.spark.createDataFrame([("2014-31-12",)], ['date_str'])
+        df2 = df.select('date_str',
+                        to_date(from_unixtime(unix_timestamp('date_str', 'yyyy-dd-aa'))))
+        self.assertRaises(SparkUpgradeException, df2.collect)
+
     def test_capture_parse_exception(self):
         self.assertRaises(ParseException, lambda: self.spark.sql("abc"))
 
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index 578cf71378203..61846aadf8c25 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -112,6 +112,12 @@ class UnknownException(CapturedException):
     """
 
 
+class SparkUpgradeException(CapturedException):
+    """
+    Exception thrown because of Spark upgrade
+    """
+
+
 def convert_exception(e):
     assert e is not None
     assert SparkContext._jvm is not None
@@ -131,6 +137,8 @@ def convert_exception(e):
         return QueryExecutionException(origin=e)
     elif is_instance_of(gw, e, 'java.lang.IllegalArgumentException'):
         return IllegalArgumentException(origin=e)
+    elif is_instance_of(gw, e, 'org.apache.spark.SparkUpgradeException'):
+        return SparkUpgradeException(origin=e)
 
     c = e.getCause()
     stacktrace = jvm.org.apache.spark.util.Utils.exceptionString(e)