apache · itholic · Feb 15, 2022 · Feb 23, 2022 · Feb 24, 2022 · Feb 24, 2022
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
@@ -18,6 +18,9 @@
   "CANNOT_UP_CAST_DATATYPE" : {
     "message" : [ "Cannot up cast %s from %s to %s.\n%s" ]
   },
+  "CANNOT_USE_MIXTURE" : {
+    "message" : [ "Cannot use a mixture of aggregate function and group aggregate pandas UDF" ]
+  },
   "CAST_CAUSES_OVERFLOW" : {
     "message" : [ "Casting %s to %s causes overflow. To return NULL instead, use 'try_cast'. If necessary set %s to false to bypass this error." ],
     "sqlState" : "22005"

diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
@@ -257,7 +257,8 @@ def test_udf_not_supported_in_join_condition(self):
 
         def runWithJoinType(join_type, type_string):
             with self.assertRaisesRegex(
-                AnalysisException, "Using PythonUDF.*%s is not supported." % type_string
+                AnalysisException,
+                "Using PythonUDF in join condition of join type %s is not supported" % type_string,
             ):
                 left.join(right, [f("a", "b"), left.a1 == right.b1], join_type).collect()
 

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -198,7 +198,9 @@ object QueryCompilationErrors {
   }
 
   def pandasUDFAggregateNotSupportedInPivotError(): Throwable = {
-    new AnalysisException("Pandas UDF aggregate expressions are currently not supported in pivot.")
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE",
+      messageParameters = Array("Pandas UDF aggregate expressions don't support pivot."))
   }
 
   def aggregateExpressionRequiredForPivotError(sql: String): Throwable = {
@@ -1330,18 +1332,15 @@ object QueryCompilationErrors {
       s"Expected: ${dataType.typeName}; Found: ${expression.dataType.typeName}")
   }
 
-  def groupAggPandasUDFUnsupportedByStreamingAggError(): Throwable = {
-    new AnalysisException("Streaming aggregation doesn't support group aggregate pandas UDF")
-  }
-
   def streamJoinStreamWithoutEqualityPredicateUnsupportedError(plan: LogicalPlan): Throwable = {
     new AnalysisException(
       "Stream-stream join without equality predicate is not supported", plan = Some(plan))
   }
 
   def cannotUseMixtureOfAggFunctionAndGroupAggPandasUDFError(): Throwable = {
     new AnalysisException(
-      "Cannot use a mixture of aggregate function and group aggregate pandas UDF")
+      errorClass = "CANNOT_USE_MIXTURE",
+      messageParameters = Array.empty)
   }
 
   def ambiguousAttributesInSelfJoinError(
@@ -1570,8 +1569,10 @@ object QueryCompilationErrors {
   }
 
   def usePythonUDFInJoinConditionUnsupportedError(joinType: JoinType): Throwable = {
-    new AnalysisException("Using PythonUDF in join condition of join type" +
-      s" $joinType is not supported.")
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE",
+      messageParameters = Array(
+        s"Using PythonUDF in join condition of join type $joinType is not supported"))
   }
 
   def conflictingAttributesInJoinConditionError(

diff --git a/...cala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala b/...cala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala
@@ -188,7 +188,8 @@ class ExtractPythonUDFFromJoinConditionSuite extends PlanTest {
         Optimize.execute(query.analyze)
       }
       assert(e.message.contentEquals(
-        s"Using PythonUDF in join condition of join type $joinType is not supported."))
+        "The feature is not supported: " +
+        s"Using PythonUDF in join condition of join type $joinType is not supported"))
 
       val query2 = testRelationLeft.join(
         testRelationRight,

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution
 import java.util.Locale
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{execution, Strategy}
+import org.apache.spark.sql.{execution, AnalysisException, Strategy}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions._
@@ -373,7 +373,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         namedGroupingExpressions, aggregateExpressions, rewrittenResultExpressions, child) =>
 
         if (aggregateExpressions.exists(PythonUDF.isGroupedAggPandasUDF)) {
-          throw QueryCompilationErrors.groupAggPandasUDFUnsupportedByStreamingAggError()
+          throw new AnalysisException(
+            "Streaming aggregation doesn't support group aggregate pandas UDF")
         }
 
         val sessionWindowOption = namedGroupingExpressions.find { p =>

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.sql.errors
 
-import org.apache.spark.sql.{AnalysisException, QueryTest}
-import org.apache.spark.sql.functions.{grouping, grouping_id}
+import org.apache.spark.sql.{AnalysisException, IntegratedUDFTestUtils, QueryTest}
+import org.apache.spark.sql.functions.{grouping, grouping_id, sum}
 import org.apache.spark.sql.test.SharedSparkSession
 
 case class StringLongClass(a: String, b: Long)
@@ -101,4 +101,71 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
     assert(e.message ===
       "The argument_index of string format cannot contain position 0$.")
   }
+
+  test("CANNOT_USE_MIXTURE: Using aggregate function with grouped aggregate pandas UDF") {
+    import IntegratedUDFTestUtils._
+
+    val df = Seq(
+      (536361, "85123A", 2, 17850),
+      (536362, "85123B", 4, 17850),
+      (536363, "86123A", 6, 17851)
+    ).toDF("InvoiceNo", "StockCode", "Quantity", "CustomerID")
+    val e = intercept[AnalysisException] {
+      val pandasTestUDF = TestGroupedAggPandasUDF(name = "pandas_udf")
+      df.groupBy("CustomerId")
+        .agg(pandasTestUDF(df("Quantity")), sum(df("Quantity"))).collect()
+    }
+
+    assert(e.errorClass === Some("CANNOT_USE_MIXTURE"))
+    assert(e.message ===
+      "Cannot use a mixture of aggregate function and group aggregate pandas UDF")
+  }
+
+  test("UNSUPPORTED_FEATURE: Using Python UDF with unsupported join condition") {
+    import IntegratedUDFTestUtils._
+
+    val df1 = Seq(
+      (536361, "85123A", 2, 17850),
+      (536362, "85123B", 4, 17850),
+      (536363, "86123A", 6, 17851)
+    ).toDF("InvoiceNo", "StockCode", "Quantity", "CustomerID")
+    val df2 = Seq(
+      ("Bob", 17850),
+      ("Alice", 17850),
+      ("Tom", 17851)
+    ).toDF("CustomerName", "CustomerID")
+
+    val e = intercept[AnalysisException] {
+      val pythonTestUDF = TestPythonUDF(name = "python_udf")
+      df1.join(
+        df2, pythonTestUDF(df1("CustomerID") === df2("CustomerID")), "leftouter").collect()
+    }
+
+    assert(e.errorClass === Some("UNSUPPORTED_FEATURE"))
+    assert(e.getSqlState === "0A000")
+    assert(e.message ===
+      "The feature is not supported: " +
+      "Using PythonUDF in join condition of join type LeftOuter is not supported")
+  }
+
+  test("UNSUPPORTED_FEATURE: Using pandas UDF aggregate expression with pivot") {
+    import IntegratedUDFTestUtils._
+
+    val df = Seq(
+      (536361, "85123A", 2, 17850),
+      (536362, "85123B", 4, 17850),
+      (536363, "86123A", 6, 17851)
+    ).toDF("InvoiceNo", "StockCode", "Quantity", "CustomerID")
+
+    val e = intercept[AnalysisException] {
+      val pandasTestUDF = TestGroupedAggPandasUDF(name = "pandas_udf")
+      df.groupBy(df("CustomerID")).pivot(df("CustomerID")).agg(pandasTestUDF(df("Quantity")))
+    }
+
+    assert(e.errorClass === Some("UNSUPPORTED_FEATURE"))
+    assert(e.getSqlState === "0A000")
+    assert(e.message ===
+      "The feature is not supported: " +
+      "Pandas UDF aggregate expressions don't support pivot.")
+  }
 }