diff --git a/common/utils/src/main/java/org/apache/spark/SparkThrowable.java b/common/utils/src/main/java/org/apache/spark/SparkThrowable.java index e1235b2982ba0..fd39cda76db67 100644 --- a/common/utils/src/main/java/org/apache/spark/SparkThrowable.java +++ b/common/utils/src/main/java/org/apache/spark/SparkThrowable.java @@ -27,7 +27,7 @@ * * - For backwards compatibility, existing Throwable types can be thrown with an arbitrary error * message with a null error class. See [[SparkException]]. - * - To promote standardization, Throwables should be thrown with an error class and message + * - To promote standardization, Throwables should be thrown with an error condition and message * parameters to construct an error message with SparkThrowableHelper.getMessage(). New Throwable * types should not accept arbitrary error messages. See [[SparkArithmeticException]]. * @@ -35,8 +35,8 @@ */ @Evolving public interface SparkThrowable { - // Succinct, human-readable, unique, and consistent representation of the error category - // If null, error class is not set + // Succinct, human-readable, unique, and consistent representation of the error condition. + // If null, error condition is not set. String getErrorClass(); // Portable error identifier across SQL engines diff --git a/common/utils/src/main/resources/error/README.md b/common/utils/src/main/resources/error/README.md index adb631ccdca7f..80e749bc71087 100644 --- a/common/utils/src/main/resources/error/README.md +++ b/common/utils/src/main/resources/error/README.md @@ -39,7 +39,7 @@ The terms error class, state, and condition come from the SQL standard. Unfortunately, we have historically used the term "error class" inconsistently to refer both to a proper error class like `42` and also to an error condition like `DATATYPE_MISSING_SIZE`. -Fixing this will require renaming `SparkException.errorClass` to `SparkException.errorCondition` and making similar changes to `ErrorClassesJsonReader` and other parts of the codebase. We will address this in [SPARK-47429]. Until that is complete, we will have to live with the fact that a string like `DATATYPE_MISSING_SIZE` is called an "error condition" in our user-facing documentation but an "error class" in the code. +Fixing this will require renaming `SparkException.errorClass` to `SparkException.errorCondition` and making similar changes to other parts of the codebase. We will address this in [SPARK-47429]. Until that is complete, we will have to live with the fact that a string like `DATATYPE_MISSING_SIZE` is called an "error condition" in our user-facing documentation but an "error class" in the code. For more details, please see [SPARK-46810]. diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 75067a1920f7f..488b7cc8a637c 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -80,7 +80,7 @@ "message" : [ "Invalid as-of join." ], - "subClass" : { + "subCondition" : { "TOLERANCE_IS_NON_NEGATIVE" : { "message" : [ "The input argument `tolerance` must be non-negative." @@ -165,7 +165,7 @@ "message" : [ "Failed to create data source table :" ], - "subClass" : { + "subCondition" : { "EXTERNAL_METADATA_UNSUPPORTED" : { "message" : [ "provider '' does not support external metadata but a schema is provided. Please remove the schema when creating the table." @@ -202,7 +202,7 @@ "message" : [ "An error occurred during loading state." ], - "subClass" : { + "subCondition" : { "CANNOT_READ_CHECKPOINT" : { "message" : [ "Cannot read RocksDB checkpoint metadata. Expected , but found ." @@ -350,7 +350,7 @@ "message" : [ "Cannot update field type:" ], - "subClass" : { + "subCondition" : { "ARRAY_TYPE" : { "message" : [ "Update the element by updating .element." @@ -390,7 +390,7 @@ "message" : [ "Error writing state store files for provider ." ], - "subClass" : { + "subCondition" : { "CANNOT_COMMIT" : { "message" : [ "Cannot perform commit during state checkpoint." @@ -447,7 +447,7 @@ "message" : [ "The codec is not available." ], - "subClass" : { + "subCondition" : { "WITH_AVAILABLE_CODECS_SUGGESTION" : { "message" : [ "Available codecs are ." @@ -477,7 +477,7 @@ "message" : [ "Could not determine which collation to use for string functions and operators." ], - "subClass" : { + "subCondition" : { "EXPLICIT" : { "message" : [ "Error occurred due to the mismatch between explicit collations: . Decide on a single explicit collation and remove others." @@ -495,7 +495,7 @@ "message" : [ "Can't create array with elements which exceeding the array size limit ," ], - "subClass" : { + "subCondition" : { "FUNCTION" : { "message" : [ "unsuccessful try to create arrays in the function ." @@ -550,7 +550,7 @@ "message" : [ "Cannot process input data types for the expression: ." ], - "subClass" : { + "subCondition" : { "MISMATCHED_TYPES" : { "message" : [ "All input types must be the same except nullable, containsNull, valueContainsNull flags, but found the input types ." @@ -581,7 +581,7 @@ "message" : [ "Generic Spark Connect error." ], - "subClass" : { + "subCondition" : { "INTERCEPTOR_CTOR_MISSING" : { "message" : [ "Cannot instantiate GRPC interceptor because is missing a default constructor without arguments." @@ -632,7 +632,7 @@ "message" : [ "Cannot create view , the reason is" ], - "subClass" : { + "subCondition" : { "NOT_ENOUGH_DATA_COLUMNS" : { "message" : [ "not enough data columns:", @@ -654,7 +654,7 @@ "message" : [ "Cannot resolve due to data type mismatch:" ], - "subClass" : { + "subCondition" : { "ARRAY_FUNCTION_DIFF_TYPES" : { "message" : [ "Input to should have been followed by a value with same element type, but it's [, ]." @@ -1049,7 +1049,7 @@ "message" : [ "Call to routine is invalid because it includes multiple argument assignments to the same parameter name ." ], - "subClass" : { + "subCondition" : { "BOTH_POSITIONAL_AND_NAMED" : { "message" : [ "A positional argument and named argument both referred to the same parameter. Please remove the named argument referring to this parameter." @@ -1131,7 +1131,7 @@ "message" : [ "'' expects a table but is a view." ], - "subClass" : { + "subCondition" : { "NO_ALTERNATIVE" : { "message" : [ "" @@ -1149,7 +1149,7 @@ "message" : [ "The table does not support ." ], - "subClass" : { + "subCondition" : { "NO_ALTERNATIVE" : { "message" : [ "" @@ -1197,7 +1197,7 @@ "message" : [ "Failed JDBC on the operation:" ], - "subClass" : { + "subCondition" : { "ALTER_TABLE" : { "message" : [ "Alter the table ." @@ -1281,7 +1281,7 @@ "message" : [ "Encountered error while reading file ." ], - "subClass" : { + "subCondition" : { "CANNOT_READ_FILE_FOOTER" : { "message" : [ "Could not read footer. Please ensure that the file is in either ORC or Parquet format.", @@ -1469,7 +1469,7 @@ "message" : [ "Illegal value provided to the State Store" ], - "subClass" : { + "subCondition" : { "EMPTY_LIST_VALUE" : { "message" : [ "Cannot write empty list values to State Store for StateName ." @@ -1505,7 +1505,7 @@ "message" : [ "Cannot write incompatible data for the table :" ], - "subClass" : { + "subCondition" : { "AMBIGUOUS_COLUMN_NAME" : { "message" : [ "Ambiguous column name in the input data ." @@ -1576,7 +1576,7 @@ "message" : [ "Incomplete complex type:" ], - "subClass" : { + "subCondition" : { "ARRAY" : { "message" : [ "The definition of \"ARRAY\" type is incomplete. You must provide an element type. For example: \"ARRAY\"." @@ -1599,7 +1599,7 @@ "message" : [ "You may get a different result due to the upgrading to" ], - "subClass" : { + "subCondition" : { "DATETIME_PATTERN_RECOGNITION" : { "message" : [ "Spark >= 3.0:", @@ -1671,7 +1671,7 @@ "message" : [ "Cannot write to , the reason is" ], - "subClass" : { + "subCondition" : { "NOT_ENOUGH_DATA_COLUMNS" : { "message" : [ "not enough data columns:", @@ -1702,7 +1702,7 @@ "message" : [ "Can't find table property:" ], - "subClass" : { + "subCondition" : { "MISSING_KEY" : { "message" : [ "." @@ -1744,7 +1744,7 @@ "message" : [ "An object in the metadata catalog has been corrupted:" ], - "subClass" : { + "subCondition" : { "SQL_CONFIG" : { "message" : [ "Corrupted view SQL configs in catalog." @@ -1818,7 +1818,7 @@ "message" : [ "The FILTER expression in an aggregate function is invalid." ], - "subClass" : { + "subCondition" : { "CONTAINS_AGGREGATE" : { "message" : [ "Expected a FILTER expression without an aggregation, but found ." @@ -1864,7 +1864,7 @@ "message" : [ "The boundary is invalid: ." ], - "subClass" : { + "subCondition" : { "END" : { "message" : [ "Expected the value is '0', '', '[, ]'." @@ -1912,7 +1912,7 @@ "message" : [ "The value '' in the config \"\" is invalid." ], - "subClass" : { + "subCondition" : { "DEFAULT_COLLATION" : { "message" : [ "Cannot resolve the given default collation. Did you mean ''?" @@ -1930,7 +1930,7 @@ "message" : [ "The cursor is invalid." ], - "subClass" : { + "subCondition" : { "DISCONNECTED" : { "message" : [ "The cursor has been disconnected by the server." @@ -1958,7 +1958,7 @@ "message" : [ "Unrecognized datetime pattern: ." ], - "subClass" : { + "subCondition" : { "ILLEGAL_CHARACTER" : { "message" : [ "Illegal pattern character found in datetime pattern: . Please provide legal character." @@ -1976,7 +1976,7 @@ "message" : [ "Failed to execute command because the destination column or variable has a DEFAULT value ," ], - "subClass" : { + "subCondition" : { "DATA_TYPE" : { "message" : [ "which requires type, but the statement provided a value of incompatible type." @@ -2004,7 +2004,7 @@ "message" : [ "Invalid value for delimiter." ], - "subClass" : { + "subCondition" : { "DELIMITER_LONGER_THAN_EXPECTED" : { "message" : [ "Delimiter cannot be more than one character: ." @@ -2094,7 +2094,7 @@ "message" : [ "The format is invalid: ." ], - "subClass" : { + "subCondition" : { "CONT_THOUSANDS_SEPS" : { "message" : [ "Thousands separators (, or G) must have digits in between them in the number format." @@ -2163,7 +2163,7 @@ "message" : [ "The handle is invalid." ], - "subClass" : { + "subCondition" : { "FORMAT" : { "message" : [ "Handle must be an UUID string of the format '00112233-4455-6677-8899-aabbccddeeff'" @@ -2221,7 +2221,7 @@ "message" : [ "Invalid inline table." ], - "subClass" : { + "subCondition" : { "CANNOT_EVALUATE_EXPRESSION_IN_INLINE_TABLE" : { "message" : [ "Cannot evaluate the expression in inline table definition." @@ -2249,7 +2249,7 @@ "message" : [ "Error parsing '' to interval. Please ensure that the value provided is in a valid format for defining an interval. You can reference the documentation for the correct format." ], - "subClass" : { + "subCondition" : { "ARITHMETIC_EXCEPTION" : { "message" : [ "Uncaught arithmetic exception while parsing ''." @@ -2317,7 +2317,7 @@ "message" : [ "Invalid inverse distribution function ." ], - "subClass" : { + "subCondition" : { "DISTINCT_UNSUPPORTED" : { "message" : [ "Cannot use DISTINCT with WITHIN GROUP." @@ -2364,7 +2364,7 @@ "message" : [ "Invalid lambda function call." ], - "subClass" : { + "subCondition" : { "DUPLICATE_ARG_NAMES" : { "message" : [ "The lambda function has duplicate arguments . Please, consider to rename the argument names or set to \"true\"." @@ -2393,7 +2393,7 @@ "message" : [ "The limit like expression is invalid." ], - "subClass" : { + "subCondition" : { "DATA_TYPE" : { "message" : [ "The expression must be integer type, but got ." @@ -2433,7 +2433,7 @@ "message" : [ "Invalid observed metrics." ], - "subClass" : { + "subCondition" : { "AGGREGATE_EXPRESSION_WITH_DISTINCT_UNSUPPORTED" : { "message" : [ "Aggregate expressions with DISTINCT are not allowed in observed metrics, but found: ." @@ -2476,7 +2476,7 @@ "message" : [ "Invalid options:" ], - "subClass" : { + "subCondition" : { "NON_MAP_FUNCTION" : { "message" : [ "Must use the `map()` function for options." @@ -2500,7 +2500,7 @@ "message" : [ "The value of parameter(s) in is invalid:" ], - "subClass" : { + "subCondition" : { "AES_CRYPTO_ERROR" : { "message" : [ "detail message: " @@ -2579,7 +2579,7 @@ "message" : [ "The partition command is invalid." ], - "subClass" : { + "subCondition" : { "PARTITION_MANAGEMENT_IS_UNSUPPORTED" : { "message" : [ "Table does not support partition management." @@ -2621,7 +2621,7 @@ "message" : [ "The input schema is not a valid schema string." ], - "subClass" : { + "subCondition" : { "NON_STRING_LITERAL" : { "message" : [ "The input expression must be string literal and not null." @@ -2669,7 +2669,7 @@ "message" : [ "Invalid SQL syntax:" ], - "subClass" : { + "subCondition" : { "ANALYZE_TABLE_UNEXPECTED_NOSCAN" : { "message" : [ "ANALYZE TABLE(S) ... COMPUTE STATISTICS ... must be either NOSCAN or empty." @@ -2790,7 +2790,7 @@ "message" : [ "Invalid subquery:" ], - "subClass" : { + "subCondition" : { "SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN" : { "message" : [ "Scalar subquery must return only one column, but got ." @@ -2815,7 +2815,7 @@ "message" : [ "The time travel timestamp expression is invalid." ], - "subClass" : { + "subCondition" : { "INPUT" : { "message" : [ "Cannot be casted to the \"TIMESTAMP\" type." @@ -2911,7 +2911,7 @@ "message" : [ "The requested write distribution is invalid." ], - "subClass" : { + "subCondition" : { "PARTITION_NUM_AND_SIZE" : { "message" : [ "The partition number and advisory partition size can't be specified at the same time." @@ -2977,7 +2977,7 @@ "Malformed records are detected in record parsing: .", "Parse Mode: . To process malformed records as null result, try setting the option 'mode' as 'PERMISSIVE'." ], - "subClass" : { + "subCondition" : { "CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS" : { "message" : [ "Parsing JSON arrays as structs is forbidden." @@ -3020,7 +3020,7 @@ "message" : [ "Resolved attribute(s) missing from in operator ." ], - "subClass" : { + "subCondition" : { "RESOLVED_ATTRIBUTE_APPEAR_IN_OPERATION" : { "message" : [ "Attribute(s) with the same name appear in the operation: .", @@ -3144,7 +3144,7 @@ "message" : [ "Not allowed in the FROM clause:" ], - "subClass" : { + "subCondition" : { "LATERAL_WITH_PIVOT" : { "message" : [ "LATERAL together with PIVOT." @@ -3167,7 +3167,7 @@ "message" : [ "The expression used for the routine or clause must be a constant STRING which is NOT NULL." ], - "subClass" : { + "subCondition" : { "NOT_CONSTANT" : { "message" : [ "To be considered constant the expression must not depend on any columns, contain a subquery, or invoke a non deterministic function such as rand()." @@ -3196,7 +3196,7 @@ "message" : [ "Assigning a NULL is not allowed here." ], - "subClass" : { + "subCondition" : { "ARRAY_ELEMENT" : { "message" : [ "The array is defined to contain only elements that are NOT NULL." @@ -3232,7 +3232,7 @@ "message" : [ "Not supported command in JDBC catalog:" ], - "subClass" : { + "subCondition" : { "COMMAND" : { "message" : [ "" @@ -3316,7 +3316,7 @@ "message" : [ "" ], - "subClass" : { + "subCondition" : { "WITHOUT_SUGGESTION" : { "message" : [ "The rounded half up from cannot be represented as Decimal(, )." @@ -3761,7 +3761,7 @@ "message" : [ "Invalid value for source option '':" ], - "subClass" : { + "subCondition" : { "IS_EMPTY" : { "message" : [ "cannot be empty." @@ -3841,7 +3841,7 @@ "message" : [ "Support of the clause or keyword: has been discontinued in this context." ], - "subClass" : { + "subCondition" : { "BANG_EQUALS_NOT" : { "message" : [ "The '!' keyword is only supported as an alias for the prefix operator 'NOT'.", @@ -4047,7 +4047,7 @@ "message" : [ "A column, variable, or function parameter with name cannot be resolved." ], - "subClass" : { + "subCondition" : { "WITHOUT_SUGGESTION" : { "message" : [ "" @@ -4065,7 +4065,7 @@ "message" : [ "A field with name cannot be resolved with the struct-type column ." ], - "subClass" : { + "subCondition" : { "WITHOUT_SUGGESTION" : { "message" : [ "" @@ -4083,7 +4083,7 @@ "message" : [ "Cannot resolve column as a map key. If the key is a string literal, add the single quotes '' around it." ], - "subClass" : { + "subCondition" : { "WITHOUT_SUGGESTION" : { "message" : [ "" @@ -4125,7 +4125,7 @@ "message" : [ "Don't support add file." ], - "subClass" : { + "subCondition" : { "DIRECTORY" : { "message" : [ "The file is a directory, consider to set \"spark.sql.legacy.addSingleFileInAddFile\" to \"false\"." @@ -4149,7 +4149,7 @@ "message" : [ "Cannot call the method \"\" of the class \"\"." ], - "subClass" : { + "subCondition" : { "FIELD_INDEX" : { "message" : [ "The row shall have a schema to get an index of the field ." @@ -4174,7 +4174,7 @@ "message" : [ "Collation is not supported for:" ], - "subClass" : { + "subCondition" : { "FOR_FUNCTION" : { "message" : [ "function . Please try to use a different collation." @@ -4217,7 +4217,7 @@ "message" : [ "DEFAULT column values is not supported." ], - "subClass" : { + "subCondition" : { "WITHOUT_SUGGESTION" : { "message" : [ "" @@ -4235,7 +4235,7 @@ "message" : [ "The deserializer is not supported:" ], - "subClass" : { + "subCondition" : { "DATA_TYPE_MISMATCH" : { "message" : [ "need a(n) field but got ." @@ -4281,7 +4281,7 @@ "message" : [ "The feature is not supported:" ], - "subClass" : { + "subCondition" : { "AES_MODE" : { "message" : [ "AES- with the padding by the function." @@ -4539,7 +4539,7 @@ "message" : [ "The generator is not supported:" ], - "subClass" : { + "subCondition" : { "MULTI_GENERATOR" : { "message" : [ "only one generator allowed per SELECT clause but found : ." @@ -4573,7 +4573,7 @@ "message" : [ "Can't insert into the target." ], - "subClass" : { + "subCondition" : { "MULTI_PATH" : { "message" : [ "Can only write data to relations with a single path but given paths are ." @@ -4606,7 +4606,7 @@ "message" : [ "MERGE operation contains unsupported condition." ], - "subClass" : { + "subCondition" : { "AGGREGATE" : { "message" : [ "Aggregates are not allowed: ." @@ -4629,7 +4629,7 @@ "message" : [ "Can't overwrite the target that is also being read from." ], - "subClass" : { + "subCondition" : { "PATH" : { "message" : [ "The target path is ." @@ -4647,7 +4647,7 @@ "message" : [ "The save mode is not supported for:" ], - "subClass" : { + "subCondition" : { "EXISTENT_PATH" : { "message" : [ "an existent path." @@ -4665,7 +4665,7 @@ "message" : [ "Unsupported subquery expression:" ], - "subClass" : { + "subCondition" : { "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED" : { "message" : [ "Accessing outer query column is not allowed in this location:", @@ -4864,7 +4864,7 @@ "message" : [ "The requires parameters but the actual number is ." ], - "subClass" : { + "subCondition" : { "WITHOUT_SUGGESTION" : { "message" : [ "Please, refer to '/sql-ref-functions.html' for a fix." diff --git a/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala b/common/utils/src/main/scala/org/apache/spark/ErrorConditionsJSONReader.scala similarity index 81% rename from common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala rename to common/utils/src/main/scala/org/apache/spark/ErrorConditionsJSONReader.scala index 99b993f1127af..81b8a035063a8 100644 --- a/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala +++ b/common/utils/src/main/scala/org/apache/spark/ErrorConditionsJSONReader.scala @@ -32,16 +32,16 @@ import org.apache.spark.annotation.DeveloperApi /** * A reader to load error information from one or more JSON files. Note that, if one error appears - * in more than one JSON files, the latter wins. + * in more than one JSON file, the latter wins. * Please read common/utils/src/main/resources/error/README.md for more details. */ @DeveloperApi -class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) { +class ErrorConditionsJsonReader(jsonFileURLs: Seq[URL]) { assert(jsonFileURLs.nonEmpty) // Exposed for testing private[spark] val errorInfoMap = - jsonFileURLs.map(ErrorClassesJsonReader.readAsMap).reduce(_ ++ _) + jsonFileURLs.map(ErrorConditionsJsonReader.readAsMap).reduce(_ ++ _) def getErrorMessage(errorClass: String, messageParameters: Map[String, String]): String = { val messageTemplate = getMessageTemplate(errorClass) @@ -49,7 +49,7 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) { sub.setEnableUndefinedVariableException(true) sub.setDisableSubstitutionInValues(true) try { - sub.replace(ErrorClassesJsonReader.TEMPLATE_REGEX.replaceAllIn( + sub.replace(ErrorConditionsJsonReader.TEMPLATE_REGEX.replaceAllIn( messageTemplate, "\\$\\{$1\\}")) } catch { case i: IllegalArgumentException => throw SparkException.internalError( @@ -61,7 +61,7 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) { def getMessageParameters(errorClass: String): Seq[String] = { val messageTemplate = getMessageTemplate(errorClass) - val matches = ErrorClassesJsonReader.TEMPLATE_REGEX.findAllIn(messageTemplate).toSeq + val matches = ErrorConditionsJsonReader.TEMPLATE_REGEX.findAllIn(messageTemplate).toSeq matches.map(m => m.stripSuffix(">").stripPrefix("<")) } @@ -74,12 +74,12 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) { val errorInfo = errorInfoMap.getOrElse( mainErrorClass, throw SparkException.internalError(s"Cannot find main error class '$errorClass'")) - assert(errorInfo.subClass.isDefined == subErrorClass.isDefined) + assert(errorInfo.subCondition.isDefined == subErrorClass.isDefined) if (subErrorClass.isEmpty) { errorInfo.messageTemplate } else { - val errorSubInfo = errorInfo.subClass.get.getOrElse( + val errorSubInfo = errorInfo.subCondition.get.getOrElse( subErrorClass.get, throw SparkException.internalError(s"Cannot find sub error class '$errorClass'")) errorInfo.messageTemplate + " " + errorSubInfo.messageTemplate @@ -98,15 +98,15 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) { val errorClasses = errorClass.split("\\.") errorClasses match { case Array(mainClass) => errorInfoMap.contains(mainClass) - case Array(mainClass, subClass) => errorInfoMap.get(mainClass).exists { info => - info.subClass.get.contains(subClass) + case Array(mainClass, subCondition) => errorInfoMap.get(mainClass).exists { info => + info.subCondition.get.contains(subCondition) } case _ => false } } } -private object ErrorClassesJsonReader { +private object ErrorConditionsJsonReader { private val TEMPLATE_REGEX = "<([a-zA-Z0-9_-]+)>".r private val mapper: JsonMapper = JsonMapper.builder() @@ -114,31 +114,31 @@ private object ErrorClassesJsonReader { .build() private def readAsMap(url: URL): Map[String, ErrorInfo] = { val map = mapper.readValue(url, new TypeReference[Map[String, ErrorInfo]]() {}) - val errorClassWithDots = map.collectFirst { - case (errorClass, _) if errorClass.contains('.') => errorClass + val errorConditionWithDots = map.collectFirst { + case (errorCondition, _) if errorCondition.contains('.') => errorCondition case (_, ErrorInfo(_, Some(map), _)) if map.keys.exists(_.contains('.')) => map.keys.collectFirst { case s if s.contains('.') => s }.get } - if (errorClassWithDots.isEmpty) { + if (errorConditionWithDots.isEmpty) { map } else { throw SparkException.internalError( - s"Found the (sub-)error class with dots: ${errorClassWithDots.get}") + s"Found an error (sub-)condition with dots: ${errorConditionWithDots.get}") } } } /** - * Information associated with an error class. + * Information associated with an error condition. * - * @param sqlState SQLSTATE associated with this class. - * @param subClass SubClass associated with this class. + * @param sqlState SQLSTATE associated with this condition. + * @param subCondition Sub-condition associated with this condition. * @param message Message format with optional placeholders (e.g. <parm>). * The error message is constructed by concatenating the lines with newlines. */ private case class ErrorInfo( message: Seq[String], - subClass: Option[Map[String, ErrorSubInfo]], + subCondition: Option[Map[String, ErrorSubInfo]], sqlState: Option[String]) { // For compatibility with multi-line error messages @JsonIgnore @@ -146,7 +146,7 @@ private case class ErrorInfo( } /** - * Information associated with an error subclass. + * Information associated with an error sub-condition. * * @param message Message format with optional placeholders (e.g. <parm>). * The error message is constructed by concatenating the lines with newlines. diff --git a/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala b/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala index db5eff72e124a..d0d3ac32dc687 100644 --- a/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala +++ b/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala @@ -31,7 +31,7 @@ private[spark] object ErrorMessageFormat extends Enumeration { * construct error messages. */ private[spark] object SparkThrowableHelper { - val errorReader = new ErrorClassesJsonReader( + val errorReader = new ErrorConditionsJsonReader( // Note that though we call them "error classes" here, the proper name is "error conditions", // hence why the name of the JSON file is different. We will address this inconsistency as part // of this ticket: https://issues.apache.org/jira/browse/SPARK-47429 diff --git a/connector/kafka-0-10-sql/src/main/resources/error/kafka-error-conditions.json b/connector/kafka-0-10-sql/src/main/resources/error/kafka-error-conditions.json index a7b22e1370fd8..48feff269804e 100644 --- a/connector/kafka-0-10-sql/src/main/resources/error/kafka-error-conditions.json +++ b/connector/kafka-0-10-sql/src/main/resources/error/kafka-error-conditions.json @@ -31,7 +31,7 @@ "If you don't want your streaming query to fail on such cases, set the source option failOnDataLoss to false.", "Reason:" ], - "subClass" : { + "subCondition" : { "ADDED_PARTITION_DOES_NOT_START_FROM_OFFSET_ZERO" : { "message" : [ "Added partition starts from instead of 0." diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala index 8dc4e543060d3..64d321a3605fa 100644 --- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala +++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala @@ -21,14 +21,11 @@ import scala.jdk.CollectionConverters._ import org.apache.kafka.common.TopicPartition -import org.apache.spark.{ErrorClassesJsonReader, SparkException, SparkThrowable} +import org.apache.spark.{ErrorConditionsJsonReader, SparkException, SparkThrowable} private object KafkaExceptionsHelper { - val errorClassesJsonReader: ErrorClassesJsonReader = - new ErrorClassesJsonReader( - // Note that though we call them "error classes" here, the proper name is "error conditions", - // hence why the name of the JSON file is different. We will address this inconsistency as - // part of this ticket: https://issues.apache.org/jira/browse/SPARK-47429 + val errorConditionsJsonReader: ErrorConditionsJsonReader = + new ErrorConditionsJsonReader( Seq(getClass.getClassLoader.getResource("error/kafka-error-conditions.json"))) } @@ -36,7 +33,7 @@ object KafkaExceptions { def mismatchedTopicPartitionsBetweenEndOffsetAndPrefetched( tpsForPrefetched: Set[TopicPartition], tpsForEndOffset: Set[TopicPartition]): SparkException = { - val errMsg = KafkaExceptionsHelper.errorClassesJsonReader.getErrorMessage( + val errMsg = KafkaExceptionsHelper.errorConditionsJsonReader.getErrorMessage( "MISMATCHED_TOPIC_PARTITIONS_BETWEEN_END_OFFSET_AND_PREFETCHED", Map( "tpsForPrefetched" -> tpsForPrefetched.toString(), @@ -49,7 +46,7 @@ object KafkaExceptions { def endOffsetHasGreaterOffsetForTopicPartitionThanPrefetched( prefetchedOffset: Map[TopicPartition, Long], endOffset: Map[TopicPartition, Long]): SparkException = { - val errMsg = KafkaExceptionsHelper.errorClassesJsonReader.getErrorMessage( + val errMsg = KafkaExceptionsHelper.errorConditionsJsonReader.getErrorMessage( "END_OFFSET_HAS_GREATER_OFFSET_FOR_TOPIC_PARTITION_THAN_PREFETCHED", Map( "prefetchedOffset" -> prefetchedOffset.toString(), @@ -62,7 +59,7 @@ object KafkaExceptions { def lostTopicPartitionsInEndOffsetWithTriggerAvailableNow( tpsForLatestOffset: Set[TopicPartition], tpsForEndOffset: Set[TopicPartition]): SparkException = { - val errMsg = KafkaExceptionsHelper.errorClassesJsonReader.getErrorMessage( + val errMsg = KafkaExceptionsHelper.errorConditionsJsonReader.getErrorMessage( "LOST_TOPIC_PARTITIONS_IN_END_OFFSET_WITH_TRIGGER_AVAILABLENOW", Map( "tpsForLatestOffset" -> tpsForLatestOffset.toString(), @@ -75,7 +72,7 @@ object KafkaExceptions { def endOffsetHasGreaterOffsetForTopicPartitionThanLatestWithTriggerAvailableNow( latestOffset: Map[TopicPartition, Long], endOffset: Map[TopicPartition, Long]): SparkException = { - val errMsg = KafkaExceptionsHelper.errorClassesJsonReader.getErrorMessage( + val errMsg = KafkaExceptionsHelper.errorConditionsJsonReader.getErrorMessage( "END_OFFSET_HAS_GREATER_OFFSET_FOR_TOPIC_PARTITION_THAN_LATEST_WITH_TRIGGER_AVAILABLENOW", Map( "latestOffset" -> latestOffset.toString(), @@ -92,7 +89,7 @@ object KafkaExceptions { groupId: String, cause: Throwable): KafkaIllegalStateException = { new KafkaIllegalStateException( - errorClass = "KAFKA_DATA_LOSS.COULD_NOT_READ_OFFSET_RANGE", + errorCondition = "KAFKA_DATA_LOSS.COULD_NOT_READ_OFFSET_RANGE", messageParameters = Map( "startOffset" -> startOffset.toString, "endOffset" -> endOffset.toString, @@ -106,7 +103,7 @@ object KafkaExceptions { offset: Long, fetchedOffset: Long): KafkaIllegalStateException = { new KafkaIllegalStateException( - errorClass = "KAFKA_DATA_LOSS.START_OFFSET_RESET", + errorCondition = "KAFKA_DATA_LOSS.START_OFFSET_RESET", messageParameters = Map( "topicPartition" -> topicPartition.toString, "offset" -> offset.toString, @@ -116,7 +113,7 @@ object KafkaExceptions { def initialOffsetNotFoundForPartitions( partitions: Set[TopicPartition]): KafkaIllegalStateException = { new KafkaIllegalStateException( - errorClass = "KAFKA_DATA_LOSS.INITIAL_OFFSET_NOT_FOUND_FOR_PARTITIONS", + errorCondition = "KAFKA_DATA_LOSS.INITIAL_OFFSET_NOT_FOUND_FOR_PARTITIONS", messageParameters = Map("partitions" -> partitions.toString)) } @@ -124,7 +121,7 @@ object KafkaExceptions { topicPartition: TopicPartition, startOffset: Long): KafkaIllegalStateException = { new KafkaIllegalStateException( - errorClass = "KAFKA_DATA_LOSS.ADDED_PARTITION_DOES_NOT_START_FROM_OFFSET_ZERO", + errorCondition = "KAFKA_DATA_LOSS.ADDED_PARTITION_DOES_NOT_START_FROM_OFFSET_ZERO", messageParameters = Map("topicPartition" -> topicPartition.toString, "startOffset" -> startOffset.toString)) } @@ -135,11 +132,11 @@ object KafkaExceptions { groupIdConfigName match { case Some(config) => new KafkaIllegalStateException( - errorClass = "KAFKA_DATA_LOSS.PARTITIONS_DELETED_AND_GROUP_ID_CONFIG_PRESENT", + errorCondition = "KAFKA_DATA_LOSS.PARTITIONS_DELETED_AND_GROUP_ID_CONFIG_PRESENT", messageParameters = Map("partitions" -> partitions.toString, "groupIdConfig" -> config)) case None => new KafkaIllegalStateException( - errorClass = "KAFKA_DATA_LOSS.PARTITIONS_DELETED", + errorCondition = "KAFKA_DATA_LOSS.PARTITIONS_DELETED", messageParameters = Map("partitions" -> partitions.toString)) } } @@ -149,7 +146,7 @@ object KafkaExceptions { prevOffset: Long, newOffset: Long): KafkaIllegalStateException = { new KafkaIllegalStateException( - errorClass = "KAFKA_DATA_LOSS.PARTITION_OFFSET_CHANGED", + errorCondition = "KAFKA_DATA_LOSS.PARTITION_OFFSET_CHANGED", messageParameters = Map( "topicPartition" -> topicPartition.toString, "prevOffset" -> prevOffset.toString, @@ -161,18 +158,18 @@ object KafkaExceptions { * Illegal state exception thrown with an error class. */ private[kafka010] class KafkaIllegalStateException( - errorClass: String, + errorCondition: String, messageParameters: Map[String, String], cause: Throwable = null) extends IllegalStateException( - KafkaExceptionsHelper.errorClassesJsonReader.getErrorMessage( - errorClass, messageParameters), cause) + KafkaExceptionsHelper.errorConditionsJsonReader.getErrorMessage( + errorCondition, messageParameters), cause) with SparkThrowable { override def getSqlState: String = - KafkaExceptionsHelper.errorClassesJsonReader.getSqlState(errorClass) + KafkaExceptionsHelper.errorConditionsJsonReader.getSqlState(errorCondition) override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava - override def getErrorClass: String = errorClass + override def getErrorClass: String = errorCondition } diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala index 231cfdc3f32fc..ecc0a5333c7e0 100644 --- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala @@ -46,8 +46,6 @@ class SparkThrowableSuite extends SparkFunSuite { "core/testOnly *SparkThrowableSuite -- -t \"Error classes are correctly formatted\"" }}} */ - private val regenerateCommand = "SPARK_GENERATE_GOLDEN_FILES=1 build/sbt " + - "\"core/testOnly *SparkThrowableSuite -- -t \\\"Error classes match with document\\\"\"" private val errorJsonFilePath = getWorkspaceFilePath( // Note that though we call them "error classes" here, the proper name is "error conditions", @@ -55,7 +53,7 @@ class SparkThrowableSuite extends SparkFunSuite { // of this ticket: https://issues.apache.org/jira/browse/SPARK-47429 "common", "utils", "src", "main", "resources", "error", "error-conditions.json") - private val errorReader = new ErrorClassesJsonReader(Seq(errorJsonFilePath.toUri.toURL)) + private val errorReader = new ErrorConditionsJsonReader(Seq(errorJsonFilePath.toUri.toURL)) override def beforeAll(): Unit = { super.beforeAll() @@ -143,7 +141,7 @@ class SparkThrowableSuite extends SparkFunSuite { test("Message invariants") { val messageSeq = errorReader.errorInfoMap.values.toSeq.flatMap { i => - Seq(i.message) ++ i.subClass.getOrElse(Map.empty).values.toSeq.map(_.message) + Seq(i.message) ++ i.subCondition.getOrElse(Map.empty).values.toSeq.map(_.message) } messageSeq.foreach { message => message.foreach { msg => @@ -180,7 +178,7 @@ class SparkThrowableSuite extends SparkFunSuite { val allowedChars = "[A-Z0-9_]*" errorReader.errorInfoMap.foreach { e => assert(e._1.matches(allowedChars), s"Error class: ${e._1} is invalid") - e._2.subClass.map { s => + e._2.subCondition.map { s => s.keys.foreach { k => assert(k.matches(allowedChars), s"Error sub-class: $k is invalid") } @@ -353,7 +351,7 @@ class SparkThrowableSuite extends SparkFunSuite { | } ] |}""".stripMargin) // scalastyle:on line.size.limit - // STANDARD w/ errorSubClass but w/o queryContext + // STANDARD w/ errorSubCondition but w/o queryContext val e2 = new SparkIllegalArgumentException( errorClass = "UNSUPPORTED_SAVE_MODE.EXISTENT_PATH", messageParameters = Map("saveMode" -> "UNSUPPORTED_MODE")) @@ -442,7 +440,8 @@ class SparkThrowableSuite extends SparkFunSuite { | } |} |""".stripMargin, StandardCharsets.UTF_8) - val reader = new ErrorClassesJsonReader(Seq(errorJsonFilePath.toUri.toURL, json.toURI.toURL)) + val reader = new ErrorConditionsJsonReader( + Seq(errorJsonFilePath.toUri.toURL, json.toURI.toURL)) assert(reader.getErrorMessage("DIVIDE_BY_ZERO", Map.empty) == "abc") } } @@ -461,7 +460,7 @@ class SparkThrowableSuite extends SparkFunSuite { |} |""".stripMargin, StandardCharsets.UTF_8) val e = intercept[SparkException] { - new ErrorClassesJsonReader(Seq(errorJsonFilePath.toUri.toURL, json.toURI.toURL)) + new ErrorConditionsJsonReader(Seq(errorJsonFilePath.toUri.toURL, json.toURI.toURL)) } assert(e.getErrorClass === "INTERNAL_ERROR") assert(e.getMessage.contains("DIVIDE.BY_ZERO")) @@ -476,7 +475,7 @@ class SparkThrowableSuite extends SparkFunSuite { | "message" : [ | "abc" | ], - | "subClass" : { + | "subCondition" : { | "BY.ZERO" : { | "message" : [ | "def" @@ -487,7 +486,7 @@ class SparkThrowableSuite extends SparkFunSuite { |} |""".stripMargin, StandardCharsets.UTF_8) val e = intercept[SparkException] { - new ErrorClassesJsonReader(Seq(errorJsonFilePath.toUri.toURL, json.toURI.toURL)) + new ErrorConditionsJsonReader(Seq(errorJsonFilePath.toUri.toURL, json.toURI.toURL)) } assert(e.getErrorClass === "INTERNAL_ERROR") assert(e.getMessage.contains("BY.ZERO")) diff --git a/dev/pyproject.toml b/dev/pyproject.toml index 4f462d14c7838..48fe8f51a2577 100644 --- a/dev/pyproject.toml +++ b/dev/pyproject.toml @@ -31,4 +31,4 @@ required-version = "23.9.1" line-length = 100 target-version = ['py38'] include = '\.pyi?$' -extend-exclude = 'cloudpickle|error_classes.py' +extend-exclude = 'cloudpickle' diff --git a/dev/tox.ini b/dev/tox.ini index c5905d12a80a1..5ea22eedda5c3 100644 --- a/dev/tox.ini +++ b/dev/tox.ini @@ -27,8 +27,6 @@ ignore = per-file-ignores = # E501 is ignored as shared.py is auto-generated. python/pyspark/ml/param/shared.py: E501, - # E501 is ignored as we should keep the json string format in error_classes.py. - python/pyspark/errors/error_classes.py: E501, # Examples contain some unused variables. examples/src/main/python/sql/datasource.py: F841, # Exclude * imports in test files diff --git a/docs/util/build-error-docs.py b/docs/util/build-error-docs.py index df6b9e3c05270..fda42fed2589b 100644 --- a/docs/util/build-error-docs.py +++ b/docs/util/build-error-docs.py @@ -33,10 +33,10 @@ def load_error_conditions(path): for name, details in raw_error_conditions.items(): if name.startswith("_LEGACY_ERROR") or name.startswith("INTERNAL_ERROR"): continue - if "subClass" in details: - for sub_name in details["subClass"]: - details["subClass"][sub_name]["message"] = ( - assemble_message(details["subClass"][sub_name]["message"]) + if "subCondition" in details: + for sub_name in details["subCondition"]: + details["subCondition"][sub_name]["message"] = ( + assemble_message(details["subCondition"][sub_name]["message"]) ) details["message"] = assemble_message(details["message"]) error_conditions[name] = details @@ -84,8 +84,8 @@ def generate_doc_rows(condition_name, condition_details): ) ] sub_condition_rows = [] - if "subClass" in condition_details: - for sub_condition_name in sorted(condition_details["subClass"]): + if "subCondition" in condition_details: + for sub_condition_name in sorted(condition_details["subCondition"]): sub_condition_rows.append( """ @@ -105,7 +105,7 @@ def generate_doc_rows(condition_name, condition_details): anchor=anchor_name(condition_name, sub_condition_name), # See comment above for explanation of ``. sub_condition_name=sub_condition_name.replace("_", "_"), - message=condition_details["subClass"][sub_condition_name]["message"], + message=condition_details["subCondition"][sub_condition_name]["message"], ) ) doc_rows = condition_row + sub_condition_rows diff --git a/python/docs/source/development/contributing.rst b/python/docs/source/development/contributing.rst index d7e87c4de390e..e6ce12e9f71d5 100644 --- a/python/docs/source/development/contributing.rst +++ b/python/docs/source/development/contributing.rst @@ -240,19 +240,19 @@ Contributing Error and Exception .. currentmodule:: pyspark.errors -To throw a standardized user-facing error or exception, developers should specify the error class and message parameters rather than an arbitrary error message. +To throw a standardized user-facing error or exception, developers should specify the error condition and message parameters rather than an arbitrary error message. Usage ~~~~~ -1. Check if an appropriate error class already exists in `Error classes in PySpark `_. - If true, use the error class and skip to step 3. -2. Add a new class to `error-conditions.json `_; keep in mind the invariants below. +1. Check if an appropriate error condition already exists in `Error conditions in PySpark `_. + If true, use the error condition and skip to step 3. +2. Add a new condition to `error-conditions.json `_; keep in mind the invariants below. 3. Check if the exception type already extends `PySparkException`. If true, skip to step 5. 4. Mix `PySparkException` into the exception. -5. Throw the exception with the error class and message parameters. +5. Throw the exception with the error condition and message parameters. **Before** @@ -279,13 +279,13 @@ Throw with arbitrary error message: .. code-block:: python class PySparkTestError(PySparkException): - def __init__(self, error_class: str, message_parameters: Dict[str, str]): - super().__init__(error_class=error_class, message_parameters=message_parameters) + def __init__(self, error_condition: str, message_parameters: Dict[str, str]): + super().__init__(error_condition=error_condition, message_parameters=message_parameters) def getMessageParameters(self) -> Optional[Dict[str, str]]: return super().getMessageParameters() -Throw with error class and message parameters: +Throw with error condition and message parameters: .. code-block:: python @@ -295,25 +295,25 @@ Throw with error class and message parameters: Access fields ~~~~~~~~~~~~~ -To access error fields, catch exceptions that extend :class:`PySparkException` and access to error class with :func:`PySparkException.getErrorClass`. +To access error fields, catch exceptions that extend :class:`PySparkException` and access to error condition with :func:`PySparkException.getErrorCondition`. .. code-block:: python try: ... except PySparkException as pe: - if pe.getErrorClass() == "PROBLEM_BECAUSE": + if pe.getErrorCondition() == "PROBLEM_BECAUSE": ... Fields ~~~~~~ -**Error class** +**Error condition** -Error classes are a succinct, human-readable representation of the error category. +Error conditions are a succinct, human-readable representation of the error category. -An uncategorized errors can be assigned to a legacy error class with the prefix `_LEGACY_ERROR_TEMP_` and an unused sequential number, for instance `_LEGACY_ERROR_TEMP_0053`. +An uncategorized error can be assigned to a legacy error condition with the prefix `_LEGACY_ERROR_TEMP_` and an unused sequential number, for instance `_LEGACY_ERROR_TEMP_0053`. Invariants: diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_conditions.py similarity index 61% rename from python/pyspark/errors/error_classes.py rename to python/pyspark/errors/error_conditions.py index 30869a3fbb2d2..b5608fc6d9940 100644 --- a/python/pyspark/errors/error_classes.py +++ b/python/pyspark/errors/error_conditions.py @@ -18,14 +18,7 @@ import json import importlib.resources -# Note: Though we call them "error classes" here, the proper name is "error conditions", -# hence why the name of the JSON file is different. -# For more information, please see: https://issues.apache.org/jira/browse/SPARK-46810 -# This discrepancy will be resolved as part of: https://issues.apache.org/jira/browse/SPARK-47429 -ERROR_CLASSES_JSON = ( - importlib.resources - .files("pyspark.errors") - .joinpath("error-conditions.json") - .read_text() +ERROR_CONDITIONS_JSON = ( + importlib.resources.files("pyspark.errors").joinpath("error-conditions.json").read_text() ) -ERROR_CLASSES_MAP = json.loads(ERROR_CLASSES_JSON) +ERROR_CONDITIONS_MAP = json.loads(ERROR_CONDITIONS_JSON) diff --git a/python/pyspark/errors/exceptions/__init__.py b/python/pyspark/errors/exceptions/__init__.py index c66f35958f8dd..78e24c3bf5d1a 100644 --- a/python/pyspark/errors/exceptions/__init__.py +++ b/python/pyspark/errors/exceptions/__init__.py @@ -19,13 +19,13 @@ def _write_self() -> None: import json from pathlib import Path - from pyspark.errors import error_classes + from pyspark.errors import error_conditions ERRORS_DIR = Path(__file__).parents[1] with open(ERRORS_DIR / "error-conditions.json", "w") as f: json.dump( - error_classes.ERROR_CLASSES_MAP, + error_conditions.ERROR_CONDITIONS_MAP, f, sort_keys=True, indent=2, diff --git a/python/pyspark/errors/exceptions/base.py b/python/pyspark/errors/exceptions/base.py index dcfc6df77a77a..e5670e7880013 100644 --- a/python/pyspark/errors/exceptions/base.py +++ b/python/pyspark/errors/exceptions/base.py @@ -14,11 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import warnings from abc import ABC, abstractmethod from enum import Enum from typing import Dict, Optional, cast, Iterable, TYPE_CHECKING, List -from pyspark.errors.utils import ErrorClassesReader +from pyspark.errors.utils import ErrorConditionsReader from pickle import PicklingError if TYPE_CHECKING: @@ -34,27 +35,37 @@ def __init__( self, message: Optional[str] = None, error_class: Optional[str] = None, + error_condition: Optional[str] = None, message_parameters: Optional[Dict[str, str]] = None, query_contexts: Optional[List["QueryContext"]] = None, ): + if error_class and error_condition: + raise PySparkValueError( + "Can't provide both `error_class` and `error_condition`. `error_class` is " + "deprecated. Use `error_condition` instead." + ) + if error_class: + warnings.warn("Use `error_condition` instead.", DeprecationWarning) + error_condition = error_class + if query_contexts is None: query_contexts = [] - self._error_reader = ErrorClassesReader() + self._error_reader = ErrorConditionsReader() if message is None: self._message = self._error_reader.get_error_message( - cast(str, error_class), cast(Dict[str, str], message_parameters) + cast(str, error_condition), cast(Dict[str, str], message_parameters) ) else: self._message = message - self._error_class = error_class + self._error_condition = error_condition self._message_parameters = message_parameters self._query_contexts = query_contexts def getErrorClass(self) -> Optional[str]: """ - Returns an error class as a string. + Returns an error condition as a string. .. versionadded:: 3.4.0 @@ -65,11 +76,27 @@ def getErrorClass(self) -> Optional[str]: :meth:`PySparkException.getQueryContext` :meth:`PySparkException.getSqlState` """ - return self._error_class + warnings.warn("Use `getErrorCondition` instead.", DeprecationWarning) + return self._error_condition + + def getErrorCondition(self) -> Optional[str]: + """ + Returns an error condition as a string. + + .. versionadded:: 4.0.0 + + See Also + -------- + :meth:`PySparkException.getMessage` + :meth:`PySparkException.getMessageParameters` + :meth:`PySparkException.getQueryContext` + :meth:`PySparkException.getSqlState` + """ + return self._error_condition def getMessageParameters(self) -> Optional[Dict[str, str]]: """ - Returns a message parameters as a dictionary. + Returns message parameters as a dictionary. .. versionadded:: 3.4.0 @@ -168,31 +195,31 @@ class IllegalArgumentException(PySparkException): class ArithmeticException(PySparkException): """ - Arithmetic exception thrown from Spark with an error class. + Arithmetic exception thrown from Spark with an error condition. """ class UnsupportedOperationException(PySparkException): """ - Unsupported operation exception thrown from Spark with an error class. + Unsupported operation exception thrown from Spark with an error condition. """ class ArrayIndexOutOfBoundsException(PySparkException): """ - Array index out of bounds exception thrown from Spark with an error class. + Array index out of bounds exception thrown from Spark with an error condition. """ class DateTimeException(PySparkException): """ - Datetime exception thrown from Spark with an error class. + Datetime exception thrown from Spark with an error condition. """ class NumberFormatException(IllegalArgumentException): """ - Number format exception thrown from Spark with an error class. + Number format exception thrown from Spark with an error condition. """ @@ -216,7 +243,7 @@ class PythonException(PySparkException): class SparkRuntimeException(PySparkException): """ - Runtime exception thrown from Spark with an error class. + Runtime exception thrown from Spark with an error condition. """ @@ -240,59 +267,71 @@ class UnknownException(PySparkException): class PySparkValueError(PySparkException, ValueError): """ - Wrapper class for ValueError to support error classes. + Wrapper class for ValueError to support error conditions. """ class PySparkTypeError(PySparkException, TypeError): """ - Wrapper class for TypeError to support error classes. + Wrapper class for TypeError to support error conditions. """ class PySparkIndexError(PySparkException, IndexError): """ - Wrapper class for IndexError to support error classes. + Wrapper class for IndexError to support error conditions. """ class PySparkAttributeError(PySparkException, AttributeError): """ - Wrapper class for AttributeError to support error classes. + Wrapper class for AttributeError to support error conditions. """ class PySparkRuntimeError(PySparkException, RuntimeError): """ - Wrapper class for RuntimeError to support error classes. + Wrapper class for RuntimeError to support error conditions. """ class PySparkAssertionError(PySparkException, AssertionError): """ - Wrapper class for AssertionError to support error classes. + Wrapper class for AssertionError to support error conditions. """ def __init__( self, message: Optional[str] = None, error_class: Optional[str] = None, + error_condition: Optional[str] = None, message_parameters: Optional[Dict[str, str]] = None, data: Optional[Iterable["Row"]] = None, ): - super().__init__(message, error_class, message_parameters) + if error_class and error_condition: + raise PySparkValueError( + "Can't provide both `error_class` and `error_condition`. Use `error_condition`." + ) + if error_class: + warnings.warn("Use `error_condition` instead.", DeprecationWarning) + error_condition = error_class + super().__init__( + message, + error_condition=error_condition, + message_parameters=message_parameters, + ) self.data = data class PySparkNotImplementedError(PySparkException, NotImplementedError): """ - Wrapper class for NotImplementedError to support error classes. + Wrapper class for NotImplementedError to support error conditions. """ class PySparkPicklingError(PySparkException, PicklingError): """ - Wrapper class for pickle.PicklingError to support error classes. + Wrapper class for pickle.PicklingError to support error conditions. """ @@ -305,13 +344,13 @@ class RetriesExceeded(PySparkException): class PySparkKeyError(PySparkException, KeyError): """ - Wrapper class for KeyError to support error classes. + Wrapper class for KeyError to support error conditions. """ class PySparkImportError(PySparkException, ImportError): """ - Wrapper class for ImportError to support error classes. + Wrapper class for ImportError to support error conditions. """ diff --git a/python/pyspark/errors/tests/test_errors.py b/python/pyspark/errors/tests/test_errors.py index d9a8cf45bceda..ac78457e507ba 100644 --- a/python/pyspark/errors/tests/test_errors.py +++ b/python/pyspark/errors/tests/test_errors.py @@ -20,38 +20,45 @@ import unittest from pyspark.errors import PySparkValueError -from pyspark.errors.error_classes import ERROR_CLASSES_JSON -from pyspark.errors.utils import ErrorClassesReader +from pyspark.errors.error_conditions import ERROR_CONDITIONS_JSON +from pyspark.errors.utils import ErrorConditionsReader class ErrorsTest(unittest.TestCase): - def test_error_classes_sorted(self): - # Test error classes is sorted alphabetically - error_reader = ErrorClassesReader() - error_class_names = list(error_reader.error_info_map.keys()) - for i in range(len(error_class_names) - 1): + def test_error_conditions_sorted(self): + """ + Make sure the error conditions are sorted alphabetically. + """ + error_reader = ErrorConditionsReader() + error_condition_names = list(error_reader.error_info_map.keys()) + for i in range(len(error_condition_names) - 1): self.assertTrue( - error_class_names[i] < error_class_names[i + 1], - f"Error class [{error_class_names[i]}] should place " - f"after [{error_class_names[i + 1]}]." + error_condition_names[i] < error_condition_names[i + 1], + f"Error condition [{error_condition_names[i]}] should place " + f"after [{error_condition_names[i + 1]}]." "\n\nRun 'cd $SPARK_HOME; bin/pyspark' and " "'from pyspark.errors.exceptions import _write_self; _write_self()' " "to automatically sort them.", ) - def test_error_classes_duplicated(self): - # Test error classes is not duplicated + def test_error_conditions_duplicated(self): + """ + Make sure no error condition is duplicated. + """ + def detect_duplication(pairs): - error_classes_json = {} + error_conditions_json = {} for name, message in pairs: - self.assertTrue(name not in error_classes_json, f"Duplicate error class: {name}") - error_classes_json[name] = message - return error_classes_json + self.assertTrue( + name not in error_conditions_json, f"Duplicate error condition: {name}" + ) + error_conditions_json[name] = message + return error_conditions_json - json.loads(ERROR_CLASSES_JSON, object_pairs_hook=detect_duplication) + json.loads(ERROR_CONDITIONS_JSON, object_pairs_hook=detect_duplication) - def test_invalid_error_class(self): - with self.assertRaisesRegex(ValueError, "Cannot find main error class"): + def test_invalid_error_condition(self): + with self.assertRaisesRegex(ValueError, "Cannot find main error condition"): PySparkValueError(error_class="invalid", message_parameters={}) diff --git a/python/pyspark/errors/utils.py b/python/pyspark/errors/utils.py index cddec3319964e..b9f1d51478387 100644 --- a/python/pyspark/errors/utils.py +++ b/python/pyspark/errors/utils.py @@ -20,7 +20,7 @@ import inspect import os from typing import Any, Callable, Dict, Match, TypeVar, Type, TYPE_CHECKING -from pyspark.errors.error_classes import ERROR_CLASSES_MAP +from pyspark.errors.error_conditions import ERROR_CONDITIONS_MAP if TYPE_CHECKING: @@ -30,23 +30,23 @@ T = TypeVar("T") -class ErrorClassesReader: +class ErrorConditionsReader: """ A reader to load error information from error-conditions.json. """ def __init__(self) -> None: - self.error_info_map = ERROR_CLASSES_MAP + self.error_info_map = ERROR_CONDITIONS_MAP - def get_error_message(self, error_class: str, message_parameters: Dict[str, str]) -> str: + def get_error_message(self, error_condition: str, message_parameters: Dict[str, str]) -> str: """ Returns the completed error message by applying message parameters to the message template. """ - message_template = self.get_message_template(error_class) + message_template = self.get_message_template(error_condition) # Verify message parameters. message_parameters_from_template = re.findall("<([a-zA-Z0-9_-]+)>", message_template) assert set(message_parameters_from_template) == set(message_parameters), ( - f"Undefined error message parameter for error class: {error_class}. " + f"Undefined error message parameter for error condition: {error_condition}. " f"Parameters: {message_parameters}" ) @@ -58,36 +58,38 @@ def replace_match(match: Match[str]) -> str: return message_template.format(**message_parameters) - def get_message_template(self, error_class: str) -> str: + def get_message_template(self, error_condition: str) -> str: """ - Returns the message template for corresponding error class from error-conditions.json. + Returns the message template for the corresponding error condition from + error-conditions.json. - For example, - when given `error_class` is "EXAMPLE_ERROR_CLASS", - and corresponding error class in error-conditions.json looks like the below: + For example, say `error_condition` is "EXAMPLE_ERROR_CONDITION", and the corresponding + error condition in error-conditions.json looks like this: .. code-block:: python - "EXAMPLE_ERROR_CLASS" : { + "EXAMPLE_ERROR_CONDITION" : { "message" : [ "Problem because of ." ] } In this case, this function returns: - "Problem because of ." - For sub error class, when given `error_class` is "EXAMPLE_ERROR_CLASS.SUB_ERROR_CLASS", - and corresponding error class in error-conditions.json looks like the below: + "Problem because of ." + + For an error sub-condition, say `error_condition` is + "EXAMPLE_ERROR_CONDITION.ERROR_SUB_CONDITION", and the corresponding error condition in + error-conditions.json looks like this: .. code-block:: python - "EXAMPLE_ERROR_CLASS" : { + "EXAMPLE_ERROR_CONDITION" : { "message" : [ "Problem because of ." ], - "sub_class" : { - "SUB_ERROR_CLASS" : { + "sub_condition" : { + "ERROR_SUB_CONDITION" : { "message" : [ "Do to fix the problem." ] @@ -96,35 +98,40 @@ def get_message_template(self, error_class: str) -> str: } In this case, this function returns: - "Problem because . Do to fix the problem." + + "Problem because . Do to fix the problem." """ - error_classes = error_class.split(".") - len_error_classes = len(error_classes) - assert len_error_classes in (1, 2) - - # Generate message template for main error class. - main_error_class = error_classes[0] - if main_error_class in self.error_info_map: - main_error_class_info_map = self.error_info_map[main_error_class] + error_conditions = error_condition.split(".") + len_error_conditions = len(error_conditions) + assert len_error_conditions in (1, 2) + + # Generate message template for main error condition. + main_error_condition = error_conditions[0] + if main_error_condition in self.error_info_map: + main_error_condition_info_map = self.error_info_map[main_error_condition] else: - raise ValueError(f"Cannot find main error class '{main_error_class}'") + raise ValueError(f"Cannot find main error condition '{main_error_condition}'") - main_message_template = "\n".join(main_error_class_info_map["message"]) + main_message_template = "\n".join(main_error_condition_info_map["message"]) - has_sub_class = len_error_classes == 2 + has_sub_condition = len_error_conditions == 2 - if not has_sub_class: + if not has_sub_condition: message_template = main_message_template else: - # Generate message template for sub error class if exists. - sub_error_class = error_classes[1] - main_error_class_subclass_info_map = main_error_class_info_map["sub_class"] - if sub_error_class in main_error_class_subclass_info_map: - sub_error_class_info_map = main_error_class_subclass_info_map[sub_error_class] + # Generate message template for error sub-condition if exists. + error_sub_condition = error_conditions[1] + main_error_condition_subcondition_info_map = main_error_condition_info_map[ + "sub_condition" + ] + if error_sub_condition in main_error_condition_subcondition_info_map: + error_sub_condition_info_map = main_error_condition_subcondition_info_map[ + error_sub_condition + ] else: - raise ValueError(f"Cannot find sub error class '{sub_error_class}'") + raise ValueError(f"Cannot find error sub-condition '{error_sub_condition}'") - sub_message_template = "\n".join(sub_error_class_info_map["message"]) + sub_message_template = "\n".join(error_sub_condition_info_map["message"]) message_template = main_message_template + " " + sub_message_template return message_template diff --git a/python/pyspark/errors_doc_gen.py b/python/pyspark/errors_doc_gen.py index e1bd94dcec4d0..32754c10c4d21 100644 --- a/python/pyspark/errors_doc_gen.py +++ b/python/pyspark/errors_doc_gen.py @@ -1,15 +1,15 @@ import re -from pyspark.errors.error_classes import ERROR_CLASSES_MAP +from pyspark.errors.error_conditions import ERROR_CONDITIONS_MAP def generate_errors_doc(output_rst_file_path: str) -> None: """ - Generates a reStructuredText (RST) documentation file for PySpark error classes. + Generates a reStructuredText (RST) documentation file for PySpark error conditions. - This function fetches error classes defined in `pyspark.errors.error_classes` + This function fetches error conditions defined in `pyspark.errors.error_conditions` and writes them into an RST file. The generated RST file provides an overview - of common, named error classes returned by PySpark. + of common, named error conditions returned by PySpark. Parameters ---------- @@ -37,19 +37,22 @@ def generate_errors_doc(output_rst_file_path: str) -> None: specific language governing permissions and limitations under the License. -======================== -Error classes in PySpark -======================== +=========================== +Error conditions in PySpark +=========================== -This is a list of common, named error classes returned by PySpark which are defined at `error-conditions.json `_. +This is a list of common, named error conditions returned by PySpark which are defined at +`error-conditions.json `_. -When writing PySpark errors, developers must use an error class from the list. If an appropriate error class is not available, add a new one into the list. For more information, please refer to `Contributing Error and Exception `_. +When writing PySpark errors, developers must use an error condition from the list. If an appropriate +error condition is not available, add a new one into the list. For more information, please refer to +`Contributing Error and Exception `_. """ # noqa with open(output_rst_file_path, "w") as f: f.write(header + "\n\n") - for error_key, error_details in ERROR_CLASSES_MAP.items(): + for error_key, error_details in ERROR_CONDITIONS_MAP.items(): f.write(error_key + "\n") - # The length of the error class name and underline must be the same + # The length of the error condition name and underline must be the same # to satisfy the RST format. f.write("-" * len(error_key) + "\n\n") messages = error_details["message"] @@ -57,6 +60,6 @@ def generate_errors_doc(output_rst_file_path: str) -> None: # Escape parentheses with a backslash when they follow a backtick. message = re.sub(r"`(\()", r"`\\\1", message) f.write(message + "\n") - # Add 2 new lines between the descriptions of each error class + # Add 2 new lines between the descriptions of each error condition # to improve the readability of the generated RST file. f.write("\n\n")