diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py index c7199ac938bea..d0c0d1c115b0f 100644 --- a/python/pyspark/errors/error_classes.py +++ b/python/pyspark/errors/error_classes.py @@ -287,6 +287,11 @@ "NumPy array input should be of dimensions." ] }, + "INVALID_NUMBER_OF_DATAFRAMES_IN_GROUP" : { + "message" : [ + "Invalid number of dataframes in group ." + ] + }, "INVALID_PANDAS_UDF" : { "message" : [ "Invalid function: " @@ -803,9 +808,9 @@ "Expected values for ``, got ." ] }, - "TYPE_HINT_REQUIRED" : { + "TYPE_HINT_SHOULD_BE_SPECIFIED" : { "message" : [ - "A is required ." + "Type hints for should be specified; however, got ." ] }, "UDF_RETURN_TYPE" : { @@ -888,6 +893,11 @@ "Unknown response: ." ] }, + "UNKNOWN_VALUE_FOR" : { + "message" : [ + "Unknown value for ``." + ] + }, "UNSUPPORTED_DATA_TYPE" : { "message" : [ "Unsupported DataType ``." @@ -983,6 +993,11 @@ "Value for `` only supports the 'pearson', got ''." ] }, + "VALUE_NOT_PLAIN_COLUMN_REFERENCE" : { + "message" : [ + "Value in should be a plain column reference such as `df.col` or `col('column')`." + ] + }, "VALUE_NOT_POSITIVE" : { "message" : [ "Value for `` must be positive, got ''." diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py index 8ffb7407714b2..6c5bd826a0234 100644 --- a/python/pyspark/sql/pandas/serializers.py +++ b/python/pyspark/sql/pandas/serializers.py @@ -707,8 +707,9 @@ def load_stream(self, stream): yield batches1, batches2 elif dataframes_in_group != 0: - raise ValueError( - "Invalid number of dataframes in group {0}".format(dataframes_in_group) + raise PySparkValueError( + error_class="INVALID_NUMBER_OF_DATAFRAMES_IN_GROUP", + message_parameters={"dataframes_in_group": str(dataframes_in_group)}, ) diff --git a/python/pyspark/sql/pandas/typehints.py b/python/pyspark/sql/pandas/typehints.py index f0c13e66a63d2..37ba02a94d583 100644 --- a/python/pyspark/sql/pandas/typehints.py +++ b/python/pyspark/sql/pandas/typehints.py @@ -18,7 +18,7 @@ from typing import Any, Callable, Dict, Optional, Union, TYPE_CHECKING from pyspark.sql.pandas.utils import require_minimum_pandas_version -from pyspark.errors import PySparkNotImplementedError +from pyspark.errors import PySparkNotImplementedError, PySparkValueError if TYPE_CHECKING: from pyspark.sql.pandas._typing import ( @@ -51,12 +51,18 @@ def infer_eval_type( annotations[parameter] for parameter in sig.parameters if parameter in annotations ] if len(parameters_sig) != len(sig.parameters): - raise ValueError("Type hints for all parameters should be specified; however, got %s" % sig) + raise PySparkValueError( + error_class="TYPE_HINT_SHOULD_BE_SPECIFIED", + message_parameters={"target": "all parameters", "sig": str(sig)}, + ) # Check if the return has a type hint return_annotation = type_hints.get("return", sig.return_annotation) if sig.empty is return_annotation: - raise ValueError("Type hint for the return type should be specified; however, got %s" % sig) + raise PySparkValueError( + error_class="TYPE_HINT_SHOULD_BE_SPECIFIED", + message_parameters={"target": "the return type", "sig": str(sig)}, + ) # Series, Frame or Union[DataFrame, Series], ... -> Series or Frame is_series_or_frame = all( diff --git a/python/pyspark/sql/pandas/types.py b/python/pyspark/sql/pandas/types.py index f4005a47357b6..36c982eb519c6 100644 --- a/python/pyspark/sql/pandas/types.py +++ b/python/pyspark/sql/pandas/types.py @@ -49,7 +49,7 @@ UserDefinedType, _create_row, ) -from pyspark.errors import PySparkTypeError, UnsupportedOperationException +from pyspark.errors import PySparkTypeError, UnsupportedOperationException, PySparkValueError if TYPE_CHECKING: import pandas as pd @@ -716,7 +716,10 @@ def convert_struct_as_dict(value: Any) -> Any: return convert_struct_as_dict else: - raise ValueError(f"Unknown value for `struct_in_pandas`: {_struct_in_pandas}") + raise PySparkValueError( + error_class="UNKNOWN_VALUE_FOR", + message_parameters={"var": str(_struct_in_pandas)}, + ) elif isinstance(dt, TimestampType): assert timezone is not None diff --git a/python/pyspark/sql/sql_formatter.py b/python/pyspark/sql/sql_formatter.py index 5e79b9ff5ea98..a27f7205a2d74 100644 --- a/python/pyspark/sql/sql_formatter.py +++ b/python/pyspark/sql/sql_formatter.py @@ -25,6 +25,7 @@ if typing.TYPE_CHECKING: from pyspark.sql import SparkSession, DataFrame from pyspark.sql.functions import lit +from pyspark.errors import PySparkValueError class SQLStringFormatter(string.Formatter): @@ -61,9 +62,9 @@ def _convert_value(self, val: Any, field_name: str) -> Optional[str]: ): return jexpr.sql() else: - raise ValueError( - "%s in %s should be a plain column reference such as `df.col` " - "or `col('column')`" % (val, field_name) + raise PySparkValueError( + error_class="VALUE_NOT_PLAIN_COLUMN_REFERENCE", + message_parameters={"val": str(val), "field_name": field_name}, ) elif isinstance(val, DataFrame): for df, n in self._temp_views: