diff --git a/python/pyspark/errors/error-conditions.json b/python/pyspark/errors/error-conditions.json index 74ac0ece6b2a7..808127772f72a 100644 --- a/python/pyspark/errors/error-conditions.json +++ b/python/pyspark/errors/error-conditions.json @@ -808,14 +808,14 @@ "Number of columns of the returned data doesn't match specified schema. Expected: Actual: " ] }, - "RESULT_ROWS_MISMATCH": { + "RESULT_COLUMN_TYPES_MISMATCH": { "message": [ - "The number of output rows () must match the number of input rows ()." + "Column types of the returned data do not match specified schema. Mismatch: ." ] }, - "RESULT_TYPE_MISMATCH_FOR_ARROW_UDF": { + "RESULT_ROWS_MISMATCH": { "message": [ - "Columns do not match in their data type: ." + "The number of output rows () must match the number of input rows ()." ] }, "REUSE_OBSERVATION": { @@ -823,11 +823,6 @@ "An Observation can be used with a DataFrame only once." ] }, - "SCHEMA_MISMATCH_FOR_ARROW_PYTHON_UDF": { - "message": [ - "Result vector from was not the required length: expected , got ." - ] - }, "SCHEMA_MISMATCH_FOR_PANDAS_UDF": { "message": [ "Result vector from was not the required length: expected , got ." diff --git a/python/pyspark/sql/tests/arrow/test_arrow_cogrouped_map.py b/python/pyspark/sql/tests/arrow/test_arrow_cogrouped_map.py index 3cb5f826fe825..43577f6905386 100644 --- a/python/pyspark/sql/tests/arrow/test_arrow_cogrouped_map.py +++ b/python/pyspark/sql/tests/arrow/test_arrow_cogrouped_map.py @@ -147,7 +147,8 @@ def test_apply_in_arrow_returning_wrong_types(self): with self.quiet(): with self.assertRaisesRegex( PythonException, - f"Columns do not match in their data type: {expected}", + "Column types of the returned data do not match specified schema. " + f"Mismatch: {expected}", ): self.cogrouped.applyInArrow( lambda left, right: left, schema=schema @@ -171,7 +172,8 @@ def test_apply_in_arrow_returning_wrong_types_positional_assignment(self): with self.quiet(): with self.assertRaisesRegex( PythonException, - f"Columns do not match in their data type: {expected}", + "Column types of the returned data do not match specified schema. " + f"Mismatch: {expected}", ): self.cogrouped.applyInArrow( lambda left, right: left, schema=schema diff --git a/python/pyspark/sql/tests/arrow/test_arrow_grouped_map.py b/python/pyspark/sql/tests/arrow/test_arrow_grouped_map.py index 20df9332aa6dd..9cb22558cd032 100644 --- a/python/pyspark/sql/tests/arrow/test_arrow_grouped_map.py +++ b/python/pyspark/sql/tests/arrow/test_arrow_grouped_map.py @@ -171,7 +171,8 @@ def test_apply_in_arrow_returning_wrong_types(self): for func_variation in function_variations(lambda table: table): with self.assertRaisesRegex( PythonException, - f"Columns do not match in their data type: {expected}", + "Column types of the returned data do not match specified schema. " + f"Mismatch: {expected}", ): df.groupby("id").applyInArrow(func_variation, schema=schema).collect() @@ -196,7 +197,8 @@ def test_apply_in_arrow_returning_wrong_types_positional_assignment(self): for func_variation in function_variations(lambda table: table): with self.assertRaisesRegex( PythonException, - f"Columns do not match in their data type: {expected}", + "Column types of the returned data do not match specified schema. " + f"Mismatch: {expected}", ): df.groupby("id").applyInArrow( func_variation, schema=schema diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py index 4bb81ae044ea6..95a7ccdc4f8dc 100644 --- a/python/pyspark/worker.py +++ b/python/pyspark/worker.py @@ -612,7 +612,7 @@ def verify_arrow_result(result, assign_cols_by_name, expected_cols_and_types): if type_mismatch: raise PySparkRuntimeError( - errorClass="RESULT_TYPE_MISMATCH_FOR_ARROW_UDF", + errorClass="RESULT_COLUMN_TYPES_MISMATCH", messageParameters={ "mismatch": ", ".join( "column '{}' (expected {}, actual {})".format(name, expected, actual)