From a1387bfab0ed6a1e59b05b451992726b1afa2d39 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 19 Jun 2023 11:48:59 +0800 Subject: [PATCH 1/7] [SPARK-43939][CONNECT][PYTHON] Add try_* functions to Scala and Python --- .../org/apache/spark/sql/functions.scala | 132 +++++++++ .../spark/sql/PlanGenerationTestSuite.scala | 52 ++++ .../explain-results/function_try_add.explain | 2 + .../explain-results/function_try_avg.explain | 2 + .../function_try_divide.explain | 2 + .../function_try_element_at_array.explain | 2 + .../function_try_element_at_map.explain | 2 + .../function_try_multiply.explain | 2 + .../function_try_subtract.explain | 2 + .../explain-results/function_try_sum.explain | 2 + .../function_try_to_binary.explain | 2 + ...ction_try_to_binary_without_format.explain | 2 + .../function_try_to_number.explain | 2 + .../function_try_to_timestamp.explain | 2 + ...on_try_to_timestamp_without_format.explain | 2 + .../query-tests/queries/function_try_add.json | 29 ++ .../queries/function_try_add.proto.bin | Bin 0 -> 183 bytes .../query-tests/queries/function_try_avg.json | 25 ++ .../queries/function_try_avg.proto.bin | Bin 0 -> 176 bytes .../queries/function_try_divide.json | 29 ++ .../queries/function_try_divide.proto.bin | Bin 0 -> 186 bytes .../function_try_element_at_array.json | 29 ++ .../function_try_element_at_array.proto.bin | Bin 0 -> 190 bytes .../queries/function_try_element_at_map.json | 29 ++ .../function_try_element_at_map.proto.bin | Bin 0 -> 190 bytes .../queries/function_try_multiply.json | 29 ++ .../queries/function_try_multiply.proto.bin | Bin 0 -> 188 bytes .../queries/function_try_subtract.json | 29 ++ .../queries/function_try_subtract.proto.bin | Bin 0 -> 188 bytes .../query-tests/queries/function_try_sum.json | 25 ++ .../queries/function_try_sum.proto.bin | Bin 0 -> 176 bytes .../queries/function_try_to_binary.json | 29 ++ .../queries/function_try_to_binary.proto.bin | Bin 0 -> 194 bytes ...function_try_to_binary_without_format.json | 25 ++ ...ion_try_to_binary_without_format.proto.bin | Bin 0 -> 182 bytes .../queries/function_try_to_number.json | 29 ++ .../queries/function_try_to_number.proto.bin | Bin 0 -> 194 bytes .../queries/function_try_to_timestamp.json | 29 ++ .../function_try_to_timestamp.proto.bin | Bin 0 -> 192 bytes ...ction_try_to_timestamp_without_format.json | 25 ++ ..._try_to_timestamp_without_format.proto.bin | Bin 0 -> 185 bytes .../reference/pyspark.sql/functions.rst | 10 + python/pyspark/sql/connect/functions.py | 76 +++++ python/pyspark/sql/functions.py | 272 ++++++++++++++++++ .../org/apache/spark/sql/functions.scala | 154 ++++++++++ .../apache/spark/sql/DateFunctionsSuite.scala | 11 + .../apache/spark/sql/MathFunctionsSuite.scala | 50 ++++ .../spark/sql/StringFunctionsSuite.scala | 17 ++ 48 files changed, 1161 insertions(+) create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_try_add.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_try_avg.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_try_divide.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_array.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_map.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_try_multiply.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_try_subtract.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_try_sum.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary_without_format.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_number.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp_without_format.explain create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_add.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_add.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_avg.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_avg.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_divide.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_divide.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_multiply.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_multiply.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_subtract.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_subtract.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_sum.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_sum.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_to_number.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_to_number.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.proto.bin create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.json create mode 100644 connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.proto.bin diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index 9c2a5b961824a..fa8346590832b 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -1807,6 +1807,75 @@ object functions { */ def sqrt(colName: String): Column = sqrt(Column(colName)) + /** + * Returns the sum of `left` and `right` and the result is null on overflow. The acceptable + * input types are the same with the `+` operator. + * + * @note + * Only Numeric type is supported in this function, while `try_add` in SQL supports Numeric, + * DATE, TIMESTAMP, and INTERVAL. + * + * @group math_funcs + * @since 3.5.0 + */ + def try_add(left: Column, right: Column): Column = Column.fn("try_add", left, right) + + /** + * Returns the mean calculated from values of a group and the result is null on overflow. + * + * @group math_funcs + * @since 3.5.0 + */ + def try_avg(e: Column): Column = Column.fn("try_avg", e) + + /** + * Returns `dividend`/`divisor`. It always performs floating point division. Its result is + * always null if `divisor` is 0. + * + * @note + * The `dividend` must be a numeric, `divisor` must be a numeric in this function. While the + * `dividend` can be a numeric or an interval, `divisor` must be a numeric in SQL function + * `try_divide`. + * + * @group math_funcs + * @since 3.5.0 + */ + def try_divide(left: Column, right: Column): Column = Column.fn("try_divide", left, right) + + /** + * Returns `left`*`right` and the result is null on overflow. The acceptable input types are the + * same with the `*` operator. + * + * @note + * Only Numeric type is supported in this function, while `try_multiply` in SQL supports + * Numeric and INTERVAL. + * + * @group math_funcs + * @since 3.5.0 + */ + def try_multiply(left: Column, right: Column): Column = Column.fn("try_multiply", left, right) + + /** + * Returns `left`-`right` and the result is null on overflow. The acceptable input types are the + * same with the `-` operator. + * + * @note + * Only Numeric type is supported in this function, while `try_subtract` in SQL supports + * Numeric, DATE, TIMESTAMP, and INTERVAL. + * + * @group math_funcs + * @since 3.5.0 + */ + def try_subtract(left: Column, right: Column): Column = Column.fn("try_subtract", left, right) + + /** + * Returns the sum calculated from values of a group and the result is null on overflow. + * + * @group math_funcs + * @since 3.5.0 + */ + def try_sum(e: Column): Column = Column.fn("try_sum", e) + /** * Creates a new struct column. If the input column is a column in a `DataFrame`, or a derived * column expression that is named (i.e. aliased), its name would be retained as the @@ -3971,6 +4040,34 @@ object functions { def startswith(str: Column, prefix: Column): Column = Column.fn("startswith", str, prefix) + /** + * This is a special version of `to_binary` that performs the same operation, but returns a NULL + * value instead of raising an error if the conversion cannot be performed. + * + * @group string_funcs + * @since 3.5.0 + */ + def try_to_binary(e: Column, f: Column): Column = Column.fn("try_to_binary", e, f) + + /** + * This is a special version of `to_binary` that performs the same operation, but returns a NULL + * value instead of raising an error if the conversion cannot be performed. + * + * @group string_funcs + * @since 3.5.0 + */ + def try_to_binary(e: Column): Column = Column.fn("try_to_binary", e) + + /** + * Convert string 'e' to a number based on the string format `format`. Returns NULL if the + * string 'e' does not match the expected format. The format follows the same semantics as the + * to_number function. + * + * @group string_funcs + * @since 3.5.0 + */ + def try_to_number(e: Column, format: Column): Column = Column.fn("try_to_number", e, format) + ////////////////////////////////////////////////////////////////////////////////////////////// // DateTime functions ////////////////////////////////////////////////////////////////////////////////////////////// @@ -4474,6 +4571,27 @@ object functions { */ def to_timestamp(s: Column, fmt: String): Column = Column.fn("to_timestamp", s, lit(fmt)) + /** + * Parses the `s` with the `format` to a timestamp. The function always returns null on an + * invalid input with/without ANSI SQL mode enabled. The result data type is consistent with the + * value of configuration `spark.sql.timestampType`. + * + * @group datetime_funcs + * @since 3.5.0 + */ + def try_to_timestamp(s: Column, format: Column): Column = + Column.fn("try_to_timestamp", s, format) + + /** + * Parses the `s` expression to a timestamp. The function always returns null on an invalid + * input with/without ANSI SQL mode enabled. It follows casting rules to a timestamp. The result + * data type is consistent with the value of configuration `spark.sql.timestampType`. + * + * @group datetime_funcs + * @since 3.5.0 + */ + def try_to_timestamp(s: Column): Column = Column.fn("try_to_timestamp", s) + /** * Converts the column into `DateType` by casting rules to `DateType`. * @@ -5034,6 +5152,20 @@ object functions { */ def element_at(column: Column, value: Any): Column = Column.fn("element_at", column, lit(value)) + /** + * (array, index) - Returns element of array at given (1-based) index. If Index is 0, Spark will + * throw an error. If index < 0, accesses elements from the last to the first. The function + * always returns NULL if the index exceeds the length of the array. + * + * (map, key) - Returns value for given key. The function always returns NULL if the key is not + * contained in the map. + * + * @group map_funcs + * @since 3.5.0 + */ + def try_element_at(column: Column, value: Column): Column = + Column.fn("try_element_at", column, value) + /** * Returns element of array at given (0-based) index. If the index points outside of the array * boundaries, then this function returns NULL. diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala index 7633cd7d0c022..9abc27ef484eb 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala @@ -1213,6 +1213,58 @@ class PlanGenerationTestSuite fn.sqrt("b") } + functionTest("try_add") { + fn.try_add(fn.col("a"), fn.col("a")) + } + + functionTest("try_avg") { + fn.try_avg(fn.col("a")) + } + + functionTest("try_divide") { + fn.try_divide(fn.col("a"), fn.col("a")) + } + + functionTest("try_multiply") { + fn.try_multiply(fn.col("a"), fn.col("a")) + } + + functionTest("try_subtract") { + fn.try_subtract(fn.col("a"), fn.col("a")) + } + + functionTest("try_sum") { + fn.try_sum(fn.col("a")) + } + + functionTest("try_to_timestamp") { + fn.try_to_timestamp(fn.col("g"), fn.col("g")) + } + + functionTest("try_to_timestamp without format") { + fn.try_to_timestamp(fn.col("g")) + } + + functionTest("try_to_binary") { + fn.try_to_binary(fn.col("g"), lit("format")) + } + + functionTest("try_to_binary without format") { + fn.try_to_binary(fn.col("g")) + } + + functionTest("try_to_number") { + fn.try_to_number(fn.col("g"), lit("99,999")) + } + + functionTest("try_element_at array") { + fn.try_element_at(fn.col("e"), fn.col("a")) + } + + functionTest("try_element_at map") { + fn.try_element_at(fn.col("f"), fn.col("g")) + } + functionTest("struct") { fn.struct("a", "d") } diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_add.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_add.explain new file mode 100644 index 0000000000000..af718833dbf21 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_add.explain @@ -0,0 +1,2 @@ +Project [(a#0 + a#0) AS try_add(a, a)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_avg.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_avg.explain new file mode 100644 index 0000000000000..84c7065fbed3b --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_avg.explain @@ -0,0 +1,2 @@ +Aggregate [try_avg(a#0) AS try_avg(a)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_divide.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_divide.explain new file mode 100644 index 0000000000000..03f59e9b9577b --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_divide.explain @@ -0,0 +1,2 @@ +Project [(cast(a#0 as double) / cast(a#0 as double)) AS try_divide(a, a)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_array.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_array.explain new file mode 100644 index 0000000000000..20e67549c6d7f --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_array.explain @@ -0,0 +1,2 @@ +Project [element_at(e#0, a#0, None, false) AS try_element_at(e, a)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_map.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_map.explain new file mode 100644 index 0000000000000..aa2b736ada30d --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_map.explain @@ -0,0 +1,2 @@ +Project [element_at(f#0, g#0, None, false) AS try_element_at(f, g)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_multiply.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_multiply.explain new file mode 100644 index 0000000000000..855ecec2ca0be --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_multiply.explain @@ -0,0 +1,2 @@ +Project [(a#0 * a#0) AS try_multiply(a, a)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_subtract.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_subtract.explain new file mode 100644 index 0000000000000..4422fd91be7c7 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_subtract.explain @@ -0,0 +1,2 @@ +Project [(a#0 - a#0) AS try_subtract(a, a)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_sum.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_sum.explain new file mode 100644 index 0000000000000..43d790d43b10f --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_sum.explain @@ -0,0 +1,2 @@ +Aggregate [try_sum(a#0) AS try_sum(a)#0L] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary.explain new file mode 100644 index 0000000000000..ed7ed2348e277 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary.explain @@ -0,0 +1,2 @@ +Project [tryeval(null) AS try_to_binary(g, format)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary_without_format.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary_without_format.explain new file mode 100644 index 0000000000000..b06903b8e24f7 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary_without_format.explain @@ -0,0 +1,2 @@ +Project [tryeval(unhex(g#0, true)) AS try_to_binary(g)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_number.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_number.explain new file mode 100644 index 0000000000000..aabb9f60c478a --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_number.explain @@ -0,0 +1,2 @@ +Project [try_to_number(g#0, 99,999) AS try_to_number(g, 99,999)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp.explain new file mode 100644 index 0000000000000..8074beab7db81 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp.explain @@ -0,0 +1,2 @@ +Project [gettimestamp(g#0, g#0, TimestampType, Some(America/Los_Angeles), false) AS try_to_timestamp(g, g)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp_without_format.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp_without_format.explain new file mode 100644 index 0000000000000..b1a432552174b --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp_without_format.explain @@ -0,0 +1,2 @@ +Project [cast(g#0 as timestamp) AS try_to_timestamp(g)#0] ++- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_add.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_add.json new file mode 100644 index 0000000000000..80300b5b5778a --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_add.json @@ -0,0 +1,29 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_add", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_add.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_add.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1cb613b3943f1c4a81ebf75f13657e7ad8018db GIT binary patch literal 183 zcmd;L5@3|tz{oX;k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1I#iaW3|fqRRNh PloTOWA!aVdL?{ISD3m$7 literal 0 HcmV?d00001 diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_avg.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_avg.json new file mode 100644 index 0000000000000..1216f4b5c635f --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_avg.json @@ -0,0 +1,25 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_avg", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_avg.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_avg.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..8ab7a5d19e380eab9aa93bb336242215be4cbc7d GIT binary patch literal 176 zcmd;L5@3{C$;dT{k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1HS40WS8EqRRNh NvUDL)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1H3{DK4&(qRRM` S%(BdsR3TO&W-i7=C1k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1GSzIWE4EqRRNx XoYdUZyps6D5+PP0W-i852$cu`@v1z~ literal 0 HcmV?d00001 diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.json new file mode 100644 index 0000000000000..c4e5bc2f415ee --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.json @@ -0,0 +1,29 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_element_at", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "f" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..2f6c54f2fa5ec56e784fb125c91066dbb20a1686 GIT binary patch literal 190 zcmd;L5@3|t&d4>1k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1GSzIWE4EqRRNx XoYdUZyps6D5+PP0W-i7w2$c>1@whzK literal 0 HcmV?d00001 diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_multiply.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_multiply.json new file mode 100644 index 0000000000000..df22654c82031 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_multiply.json @@ -0,0 +1,29 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_multiply", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_multiply.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_multiply.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..8912423235e0b2e8644e2bf856d34bdac8a764e2 GIT binary patch literal 188 zcmd;L5@3|t%E&c|k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1IFS87`iZqRRN( U(wvgaf}BbrRv~6C#zZIu0C4g=CIA2c literal 0 HcmV?d00001 diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_subtract.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_subtract.json new file mode 100644 index 0000000000000..f3a5df24cce88 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_subtract.json @@ -0,0 +1,29 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_subtract", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_subtract.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_subtract.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..f0cb5f502787389fe36a304417a66b453635eda1 GIT binary patch literal 188 zcmd;L5@3|t%E&c|k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1IFS87`iZqRRN< U(xj52#N-koRv~6C#zZIu0B?3Y4gdfE literal 0 HcmV?d00001 diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_sum.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_sum.json new file mode 100644 index 0000000000000..41e93d1fcf956 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_sum.json @@ -0,0 +1,25 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_sum", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "a" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_sum.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_sum.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..dce7d9df359c9528abd83ce886fae5d902d4365f GIT binary patch literal 176 zcmd;L5@3{C$;dT{k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1HS40WS8EqRRN< N(p(`{A!aVdL;#ZNIXVCU literal 0 HcmV?d00001 diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.json new file mode 100644 index 0000000000000..9b57b6b26b562 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.json @@ -0,0 +1,29 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_to_binary", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }, { + "literal": { + "string": "format" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..28b7059160757aa9bbe2b5548bdcea3f31026c12 GIT binary patch literal 194 zcmd;L5@3|t&B!&0k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1G?@B`)5QqRRM^ f{P?8Iyu_kPAyy$~F2-~rE-sENwzT}B+{6+9?bbgP literal 0 HcmV?d00001 diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.json new file mode 100644 index 0000000000000..2498ff9a7872f --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.json @@ -0,0 +1,25 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_to_binary", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..682eb1821a3a1ba535b5073052e7753e76da618f GIT binary patch literal 182 zcmd;L5@3{C&&V~2k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1Ge%F)rScqRRM^ T{P?8Iyu_kPAyy$~F2-~K1;jhb literal 0 HcmV?d00001 diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_number.json new file mode 100644 index 0000000000000..44e894743dfc8 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_number.json @@ -0,0 +1,29 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_to_number", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }, { + "literal": { + "string": "99,999" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_number.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..c2eba8a19d5df1b543508114eec27b2a4e70c16b GIT binary patch literal 194 zcmd;L5@3|t&B!&0k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1G?@B`)5QqRRM^ f{P?`m+@#bZAyy$~F2-~rE-sENHcLw#OG`@t>M1?h literal 0 HcmV?d00001 diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.json new file mode 100644 index 0000000000000..d00967823a33c --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.json @@ -0,0 +1,29 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_to_timestamp", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..4f0300d48a6fc2841a907a13835e4038ff20dce4 GIT binary patch literal 192 zcmd;L5@3|t$;dT{k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1He81ulV-qRRM^ Y{P>d0+|=Td#M}ZQRv~6C#&jqJ0DCJv*Z=?k literal 0 HcmV?d00001 diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.json new file mode 100644 index 0000000000000..4fdfc38ca539b --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.json @@ -0,0 +1,25 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_to_timestamp", + "arguments": [{ + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.proto.bin new file mode 100644 index 0000000000000000000000000000000000000000..91a4156e305f6f601ac9a328b0e3fe28cc034ec8 GIT binary patch literal 185 zcmd;L5@3|t#K<*?k&8)yA*!2EsDrV%q^LBx#3nPvDk(EPGp|G^(F#N+S*7HcCgr5+ zq*xJ9VW*R7l~`1iSZM>)XQz{9m77>#1Jsk5m##xdtDR0d$atVqJ1J=?NiKnsqRRM^ W{P>d0+|=Td#M}ZQRv~6C#&iJ2h&?3$ literal 0 HcmV?d00001 diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst index 66b2ad149dc2a..1ee1435bdb244 100644 --- a/python/docs/source/reference/pyspark.sql/functions.rst +++ b/python/docs/source/reference/pyspark.sql/functions.rst @@ -104,6 +104,14 @@ Math Functions tan tanh toDegrees + try_add + try_avg + try_divide + try_multiply + try_subtract + try_sum + try_to_binary + try_to_number degrees toRadians radians @@ -159,6 +167,7 @@ Datetime Functions window session_window timestamp_seconds + try_to_timestamp unix_date unix_micros unix_millis @@ -234,6 +243,7 @@ Collection Functions schema_of_csv str_to_map to_csv + try_element_at Partition Transformation Functions diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py index 84a44baccdcb4..573ca0fcddf7b 100644 --- a/python/pyspark/sql/connect/functions.py +++ b/python/pyspark/sql/connect/functions.py @@ -843,6 +843,48 @@ def sqrt(col: "ColumnOrName") -> Column: sqrt.__doc__ = pysparkfuncs.sqrt.__doc__ +def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column: + return _invoke_function_over_columns("try_add", left, right) + + +try_add.__doc__ = pysparkfuncs.try_add.__doc__ + + +def try_avg(col: "ColumnOrName") -> Column: + return _invoke_function_over_columns("try_avg", col) + + +try_avg.__doc__ = pysparkfuncs.try_avg.__doc__ + + +def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column: + return _invoke_function_over_columns("try_divide", left, right) + + +try_divide.__doc__ = pysparkfuncs.try_divide.__doc__ + + +def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column: + return _invoke_function_over_columns("try_multiply", left, right) + + +try_multiply.__doc__ = pysparkfuncs.try_multiply.__doc__ + + +def try_subtract(left: "ColumnOrName", right: "ColumnOrName") -> Column: + return _invoke_function_over_columns("try_subtract", left, right) + + +try_subtract.__doc__ = pysparkfuncs.try_subtract.__doc__ + + +def try_sum(col: "ColumnOrName") -> Column: + return _invoke_function_over_columns("try_sum", col) + + +try_sum.__doc__ = pysparkfuncs.try_sum.__doc__ + + def tan(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("tan", col) @@ -1638,6 +1680,13 @@ def element_at(col: "ColumnOrName", extraction: Any) -> Column: element_at.__doc__ = pysparkfuncs.element_at.__doc__ +def try_element_at(col: "ColumnOrName", extraction: "ColumnOrName") -> Column: + return _invoke_function_over_columns("try_element_at", col, extraction) + + +try_element_at.__doc__ = pysparkfuncs.try_element_at.__doc__ + + def exists(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column: return _invoke_higher_order_function("exists", [col], [f]) @@ -2490,6 +2539,23 @@ def startswith(str: "ColumnOrName", prefix: "ColumnOrName") -> Column: startswith.__doc__ = pysparkfuncs.startswith.__doc__ +def try_to_binary(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> Column: + if format is not None: + return _invoke_function_over_columns("try_to_binary", col, format) + else: + return _invoke_function_over_columns("try_to_binary", col) + + +try_to_binary.__doc__ = pysparkfuncs.try_to_binary.__doc__ + + +def try_to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column: + return _invoke_function_over_columns("try_to_number", col, format) + + +try_to_number.__doc__ = pysparkfuncs.try_to_number.__doc__ + + # Date/Timestamp functions # TODO(SPARK-41455): Resolve dtypes inconsistencies for: # to_timestamp, from_utc_timestamp, to_utc_timestamp, @@ -2711,6 +2777,16 @@ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column: to_timestamp.__doc__ = pysparkfuncs.to_timestamp.__doc__ +def try_to_timestamp(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> Column: + if format is not None: + return _invoke_function_over_columns("try_to_timestamp", col, format) + else: + return _invoke_function_over_columns("try_to_timestamp", col) + + +try_to_timestamp.__doc__ = pysparkfuncs.try_to_timestamp.__doc__ + + def xpath(xml: "ColumnOrName", path: "ColumnOrName") -> Column: return _invoke_function_over_columns("xpath", xml, path) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 0d8f69daabb91..a3f9181840f92 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -350,6 +350,159 @@ def sqrt(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("sqrt", col) +@try_remote_functions +def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column: + """ + Returns the sum of `left`and `right` and the result is null on overflow. + The acceptable input types are the same with the `+` operator. + + .. versionadded:: 3.5.0 + + Notes + ----- + Only Numeric type is supported in this function, while `try_add` in SQL supports Numeric, + DATE, TIMESTAMP, and INTERVAL. + + Parameters + ---------- + left : :class:`~pyspark.sql.Column` or str + right : :class:`~pyspark.sql.Column` or str + + Examples + -------- + >>> df = spark.createDataFrame([(1982, 15), (1990, 2)], ["birth", "age"]) + >>> df.select(try_add(df.birth, df.age).alias('r')).collect() + [Row(r=1997), Row(r=1992)] + """ + return _invoke_function_over_columns("try_add", left, right) + + +@try_remote_functions +def try_avg(col: "ColumnOrName") -> Column: + """ + Returns the mean calculated from values of a group and the result is null on overflow. + + .. versionadded:: 3.5.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + + Examples + -------- + >>> df = spark.createDataFrame([(1982, 15), (1990, 2)], ["birth", "age"]) + >>> df.select(try_avg(df.age).alias('r')).collect() + [Row(r=8.5)] + """ + return _invoke_function_over_columns("try_avg", col) + + +@try_remote_functions +def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column: + """ + Returns `dividend`/`divisor`. It always performs floating point division. Its result is + always null if `divisor` is 0. + + .. versionadded:: 3.5.0 + + Notes + ----- + The `dividend` must be a numeric, `divisor` must be a numeric in this function. While the + `dividend` can be a numeric or an interval, `divisor` must be a numeric in SQL function + `try_divide`. + + Parameters + ---------- + left : :class:`~pyspark.sql.Column` or str + dividend + right : :class:`~pyspark.sql.Column` or str + divisor + + Examples + -------- + >>> df = spark.createDataFrame([(6000, 15), (1990, 2)], ["a", "b"]) + >>> df.select(try_divide(df.a, df.b).alias('r')).collect() + [Row(r=400.0), Row(r=995.0)] + """ + return _invoke_function_over_columns("try_divide", left, right) + + +@try_remote_functions +def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column: + """ + Returns `left`*`right` and the result is null on overflow. The acceptable input types are the + same with the `*` operator. + + .. versionadded:: 3.5.0 + + Notes + ----- + Only Numeric type is supported in this function, while `try_multiply` in SQL supports + Numeric and INTERVAL. + + Parameters + ---------- + left : :class:`~pyspark.sql.Column` or str + multiplicand + right : :class:`~pyspark.sql.Column` or str + multiplier + + Examples + -------- + >>> df = spark.createDataFrame([(6000, 15), (1990, 2)], ["a", "b"]) + >>> df.select(try_multiply(df.a, df.b).alias('r')).collect() + [Row(r=90000), Row(r=3980)] + """ + return _invoke_function_over_columns("try_multiply", left, right) + + +@try_remote_functions +def try_subtract(left: "ColumnOrName", right: "ColumnOrName") -> Column: + """ + Returns `left`-`right` and the result is null on overflow. The acceptable input types are the + same with the `-` operator. + + .. versionadded:: 3.5.0 + + Notes + ----- + Only Numeric type is supported in this function, while `try_subtract` in SQL supports + Numeric, DATE, TIMESTAMP, and INTERVAL. + + Parameters + ---------- + left : :class:`~pyspark.sql.Column` or str + right : :class:`~pyspark.sql.Column` or str + + Examples + -------- + >>> df = spark.createDataFrame([(6000, 15), (1990, 2)], ["a", "b"]) + >>> df.select(try_subtract(df.a, df.b).alias('r')).collect() + [Row(r=5985), Row(r=1988)] + """ + return _invoke_function_over_columns("try_subtract", left, right) + + +@try_remote_functions +def try_sum(col: "ColumnOrName") -> Column: + """ + Returns the sum calculated from values of a group and the result is null on overflow. + + .. versionadded:: 3.5.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + + Examples + -------- + >>> df = spark.range(10) + >>> df.select(try_sum(df["id"]).alias('r')).collect() + [Row(r=45)] + """ + return _invoke_function_over_columns("try_sum", col) + + @try_remote_functions def abs(col: "ColumnOrName") -> Column: """ @@ -6331,6 +6484,36 @@ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column: return _invoke_function("to_timestamp", _to_java_column(col), format) +def try_to_timestamp(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> Column: + """ + Parses the `col` with the `format` to a timestamp. The function always + returns null on an invalid input with/without ANSI SQL mode enabled. The result data type is + consistent with the value of configuration `spark.sql.timestampType`. + + .. versionadded:: 3.5.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + column values to convert. + format: str, optional + format to use to convert timestamp values. + + Examples + -------- + >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) + >>> df.select(try_to_timestamp(df.t).alias('dt')).collect() + [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))] + + >>> df.select(try_to_timestamp(df.t, lit('yyyy-MM-dd HH:mm:ss')).alias('dt')).collect() + [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))] + """ + if format is not None: + return _invoke_function_over_columns("try_to_timestamp", col, format) + else: + return _invoke_function_over_columns("try_to_timestamp", col) + + @try_remote_functions def xpath(xml: "ColumnOrName", path: "ColumnOrName") -> Column: """ @@ -9467,6 +9650,61 @@ def startswith(str: "ColumnOrName", prefix: "ColumnOrName") -> Column: return _invoke_function_over_columns("startswith", str, prefix) +def try_to_binary(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> Column: + """ + This is a special version of `to_binary` that performs the same operation, but returns a NULL + value instead of raising an error if the conversion cannot be performed. + + .. versionadded:: 3.5.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + Input column or strings. + format : :class:`~pyspark.sql.Column` or str, optional + format to use to convert binary values. + + Examples + -------- + >>> df = spark.createDataFrame([("abc",)], ["e"]) + >>> df.select(try_to_binary(df.e, lit("utf-8")).alias('r')).collect() + [Row(r=bytearray(b'abc'))] + + >>> df = spark.createDataFrame([("414243",)], ["e"]) + >>> df.select(try_to_binary(df.e).alias('r')).collect() + [Row(r=bytearray(b'ABC'))] + """ + if format is not None: + return _invoke_function_over_columns("try_to_binary", col, format) + else: + return _invoke_function_over_columns("try_to_binary", col) + + +@try_remote_functions +def try_to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column: + """ + Convert string 'col' to a number based on the string format `format`. Returns NULL if the + string 'col' does not match the expected format. The format follows the same semantics as the + to_number function. + + .. versionadded:: 3.5.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + Input column or strings. + format : :class:`~pyspark.sql.Column` or str, optional + format to use to convert number values. + + Examples + -------- + >>> df = spark.createDataFrame([("$78.12",)], ["e"]) + >>> df.select(try_to_number(df.e, lit("$99.99")).alias('r')).collect() + [Row(r=Decimal('78.12'))] + """ + return _invoke_function_over_columns("try_to_number", col, format) + + # ---------------------- Collection functions ------------------------------ @@ -9870,6 +10108,40 @@ def element_at(col: "ColumnOrName", extraction: Any) -> Column: return _invoke_function_over_columns("element_at", col, lit(extraction)) +@try_remote_functions +def try_element_at(col: "ColumnOrName", extraction: "ColumnOrName") -> Column: + """ + (array, index) - Returns element of array at given (1-based) index. If Index is 0, Spark will + throw an error. If index < 0, accesses elements from the last to the first. The function + always returns NULL if the index exceeds the length of the array. + + (map, key) - Returns value for given key. The function always returns NULL if the key is not + contained in the map. + + .. versionadded:: 3.5.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + name of column containing array or map + extraction : + index to check for in array or key to check for in map + + Examples + -------- + >>> df = spark.createDataFrame([(["a", "b", "c"],)], ['data']) + >>> df.select(try_element_at(df.data, lit(1)).alias('r')).collect() + [Row(r='a')] + >>> df.select(try_element_at(df.data, lit(-1)).alias('r')).collect() + [Row(r='c')] + + >>> df = spark.createDataFrame([({"a": 1.0, "b": 2.0},)], ['data']) + >>> df.select(try_element_at(df.data, lit("a")).alias('r')).collect() + [Row(r=1.0)] + """ + return _invoke_function_over_columns("try_element_at", col, extraction) + + @try_remote_functions def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column: """ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 81a57368a8d53..fa1c7ad371133 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1872,6 +1872,87 @@ object functions { */ def sqrt(colName: String): Column = sqrt(Column(colName)) + /** + * Returns the sum of `left` and `right` and the result is null on overflow. The acceptable + * input types are the same with the `+` operator. + * + * @note + * Only Numeric type is supported in this function, while `try_add` in SQL supports Numeric, + * DATE, TIMESTAMP, and INTERVAL. + * + * @group math_funcs + * @since 3.5.0 + */ + def try_add(left: Column, right: Column): Column = withExpr { + Add(left.expr, right.expr, EvalMode.TRY) + } + + /** + * Returns the mean calculated from values of a group and the result is null on overflow. + * + * @group math_funcs + * @since 3.5.0 + */ + def try_avg(e: Column): Column = withAggregateFunction { + Average(e.expr, EvalMode.TRY) + } + + /** + * Returns `dividend`/`divisor`. It always performs floating point division. Its result is + * always null if `divisor` is 0. + * + * @note + * The `dividend` must be a numeric, `divisor` must be a numeric in this function. While the + * `dividend` can be a numeric or an interval, `divisor` must be a numeric in SQL function + * `try_divide`. + * + * @group math_funcs + * @since 3.5.0 + */ + def try_divide(dividend: Column, divisor: Column): Column = withExpr { + Divide(dividend.expr, divisor.expr, EvalMode.TRY) + } + + /** + * Returns `left`*`right` and the result is null on overflow. The acceptable input types are the + * same with the `*` operator. + * + * @note + * Only Numeric type is supported in this function, while `try_multiply` in SQL supports + * Numeric and INTERVAL. + * + * @group math_funcs + * @since 3.5.0 + */ + def try_multiply(left: Column, right: Column): Column = withExpr { + Multiply(left.expr, right.expr, EvalMode.TRY) + } + + /** + * Returns `left`-`right` and the result is null on overflow. The acceptable input types are the + * same with the `-` operator. + * + * @note + * Only Numeric type is supported in this function, while `try_subtract` in SQL supports + * Numeric, DATE, TIMESTAMP, and INTERVAL. + * + * @group math_funcs + * @since 3.5.0 + */ + def try_subtract(left: Column, right: Column): Column = withExpr { + Subtract(left.expr, right.expr, EvalMode.TRY) + } + + /** + * Returns the sum calculated from values of a group and the result is null on overflow. + * + * @group math_funcs + * @since 3.5.0 + */ + def try_sum(e: Column): Column = withAggregateFunction { + Sum(e.expr, EvalMode.TRY) + } + /** * Creates a new struct column. * If the input column is a column in a `DataFrame`, or a derived column expression @@ -4075,6 +4156,40 @@ object functions { StartsWith(str.expr, prefix.expr) } + /** + * This is a special version of `to_binary` that performs the same operation, but returns a NULL + * value instead of raising an error if the conversion cannot be performed. + * + * @group string_funcs + * @since 3.5.0 + */ + def try_to_binary(e: Column, format: Column): Column = withExpr { + new TryToBinary(e.expr, format.expr) + } + + /** + * This is a special version of `to_binary` that performs the same operation, but returns a NULL + * value instead of raising an error if the conversion cannot be performed. + * + * @group string_funcs + * @since 3.5.0 + */ + def try_to_binary(e: Column): Column = withExpr { + new TryToBinary(e.expr) + } + + /** + * Convert string 'e' to a number based on the string format `format`. Returns NULL if the + * string 'e' does not match the expected format. The format follows the same semantics as the + * to_number function. + * + * @group string_funcs + * @since 3.5.0 + */ + def try_to_number(e: Column, format: Column): Column = withExpr { + TryToNumber(e.expr, format.expr) + } + ////////////////////////////////////////////////////////////////////////////////////////////// // DateTime functions ////////////////////////////////////////////////////////////////////////////////////////////// @@ -4534,6 +4649,30 @@ object functions { new ParseToTimestamp(s.expr, Literal(fmt)) } + /** + * Parses the `s` with the `format` to a timestamp. The function always returns null on an + * invalid input with/without ANSI SQL mode enabled. The result data type is consistent with the + * value of configuration `spark.sql.timestampType`. + * + * @group datetime_funcs + * @since 3.5.0 + */ + def try_to_timestamp(s: Column, format: Column): Column = withExpr { + new ParseToTimestamp(s.expr, format.expr) + } + + /** + * Parses the `s` expression to a timestamp. The function always returns null on an invalid + * input with/without ANSI SQL mode enabled. It follows casting rules to a timestamp. The result + * data type is consistent with the value of configuration `spark.sql.timestampType`. + * + * @group datetime_funcs + * @since 3.5.0 + */ + def try_to_timestamp(s: Column): Column = withExpr { + new ParseToTimestamp(s.expr) + } + /** * Converts the column into `DateType` by casting rules to `DateType`. * @@ -5125,6 +5264,21 @@ object functions { ElementAt(column.expr, lit(value).expr) } + /** + * (array, index) - Returns element of array at given (1-based) index. If Index is 0, Spark will + * throw an error. If index < 0, accesses elements from the last to the first. The function + * always returns NULL if the index exceeds the length of the array. + * + * (map, key) - Returns value for given key. The function always returns NULL if the key is not + * contained in the map. + * + * @group map_funcs + * @since 3.5.0 + */ + def try_element_at(column: Column, value: Column): Column = withExpr { + new TryElementAt(column.expr, value.expr) + } + /** * Returns element of array at given (0-based) index. If the index points * outside of the array boundaries, then this function returns NULL. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala index 94e1ee2366a6b..1d4adcbe4a000 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala @@ -1205,4 +1205,15 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession { val result6 = df.select(make_ym_interval()) checkAnswer(result5, result6) } + + test("try_to_timestamp") { + val df = Seq(("2016-12-31", "yyyy-MM-dd")).toDF("a", "b") + val ts = Timestamp.valueOf("2016-12-31 00:00:00") + + checkAnswer(df.selectExpr("try_to_timestamp(a, b)"), Seq(Row(ts))) + checkAnswer(df.select(try_to_timestamp(col("a"), col("b"))), Seq(Row(ts))) + + checkAnswer(df.selectExpr("try_to_timestamp(a)"), Seq(Row(ts))) + checkAnswer(df.select(try_to_timestamp(col("a"))), Seq(Row(ts))) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala index fde55e27bf380..484aa2acece42 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala @@ -647,4 +647,54 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { df1.select(width_bucket(col("v"), col("min"), col("max"), col("n"))) ) } + + test("try_add") { + val df = Seq((1982, 15)).toDF("birth", "age") + + checkAnswer(df.selectExpr("try_add(birth, age)"), Seq(Row(1997))) + checkAnswer(df.select(try_add(col("birth"), col("age"))), Seq(Row(1997))) + + checkAnswer(df.select(try_add(lit(2147483647), lit(2))), Seq(Row(1997))) + } + + test("try_avg") { + val df = Seq((1982, 15), (1990, 11)).toDF("birth", "age") + + checkAnswer(df.selectExpr("try_avg(age)"), Seq(Row(13))) + checkAnswer(df.select(try_avg(col("age"))), Seq(Row(13))) + } + + test("try_divide") { + val df = Seq((2000, 10), (2050, 5)).toDF("birth", "age") + + checkAnswer(df.selectExpr("try_divide(birth, age)"), Seq(Row(200.0), Row(410.0))) + checkAnswer(df.select(try_divide(col("birth"), col("age"))), Seq(Row(200.0), Row(410.0))) + } + + test("try_element_at") { + val df = Seq((Array(1, 2, 3), 2)).toDF("a", "b") + checkAnswer(df.selectExpr("try_element_at(a, b)"), Seq(Row(2))) + checkAnswer(df.select(try_element_at(col("a"), col("b"))), Seq(Row(2))) + } + + test("try_multiply") { + val df = Seq((2, 3)).toDF("a", "b") + + checkAnswer(df.selectExpr("try_multiply(a, b)"), Seq(Row(6))) + checkAnswer(df.select(try_multiply(col("a"), col("b"))), Seq(Row(6))) + } + + test("try_subtract") { + val df = Seq((2, 3)).toDF("a", "b") + + checkAnswer(df.selectExpr("try_subtract(a, b)"), Seq(Row(-1))) + checkAnswer(df.select(try_subtract(col("a"), col("b"))), Seq(Row(-1))) + } + + test("try_sum") { + val df = Seq((2, 3), (5, 6)).toDF("a", "b") + + checkAnswer(df.selectExpr("try_sum(a)"), Seq(Row(7))) + checkAnswer(df.select(try_sum(col("a"))), Seq(Row(7))) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 21ab4899a75cf..c7bdff6939d3c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -1003,4 +1003,21 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer(df.selectExpr("startswith(a, b)"), Row(true)) checkAnswer(df.select(startswith(col("a"), col("b"))), Row(true)) } + + test("try_to_binary") { + val df = Seq("abc").toDF("a") + + checkAnswer(df.selectExpr("try_to_binary(a, 'utf-8')"), + df.select(try_to_binary(col("a"), lit("utf-8")))) + + checkAnswer(df.selectExpr("try_to_binary(a)"), + df.select(try_to_binary(col("a")))) + } + + test("try_to_number") { + val df = Seq("$78.12").toDF("a") + + checkAnswer(df.selectExpr("try_to_number(a, '$99.99')"), Seq(Row(78.12))) + checkAnswer(df.select(try_to_number(col("a"), lit("$99.99"))), Seq(Row(78.12))) + } } From 416e8f7224081afade28523430f556c94cb2cd1b Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 19 Jun 2023 11:51:07 +0800 Subject: [PATCH 2/7] [SPARK-43939][CONNECT][PYTHON] Add try_* functions to Scala and Python --- .../test/scala/org/apache/spark/sql/MathFunctionsSuite.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala index 484aa2acece42..2fed4b3fdc794 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala @@ -653,8 +653,6 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer(df.selectExpr("try_add(birth, age)"), Seq(Row(1997))) checkAnswer(df.select(try_add(col("birth"), col("age"))), Seq(Row(1997))) - - checkAnswer(df.select(try_add(lit(2147483647), lit(2))), Seq(Row(1997))) } test("try_avg") { From 12371998cbd5a6426575d0c674c1cf50f5555258 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 19 Jun 2023 16:02:56 +0800 Subject: [PATCH 3/7] [SPARK-43939][CONNECT][PYTHON] Add try_* functions to Scala and Python --- .../jvm/src/main/scala/org/apache/spark/sql/functions.scala | 4 ++-- sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index fa8346590832b..d5697227e1a7b 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -1829,7 +1829,7 @@ object functions { def try_avg(e: Column): Column = Column.fn("try_avg", e) /** - * Returns `dividend`/`divisor`. It always performs floating point division. Its result is + * Returns `dividend``/``divisor`. It always performs floating point division. Its result is * always null if `divisor` is 0. * * @note @@ -1843,7 +1843,7 @@ object functions { def try_divide(left: Column, right: Column): Column = Column.fn("try_divide", left, right) /** - * Returns `left`*`right` and the result is null on overflow. The acceptable input types are the + * Returns `left``*``right` and the result is null on overflow. The acceptable input types are the * same with the `*` operator. * * @note diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index fa1c7ad371133..388c3a56e71a0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1898,7 +1898,7 @@ object functions { } /** - * Returns `dividend`/`divisor`. It always performs floating point division. Its result is + * Returns `dividend``/``divisor`. It always performs floating point division. Its result is * always null if `divisor` is 0. * * @note @@ -1914,7 +1914,7 @@ object functions { } /** - * Returns `left`*`right` and the result is null on overflow. The acceptable input types are the + * Returns `left``*``right` and the result is null on overflow. The acceptable input types are the * same with the `*` operator. * * @note From 6be41ee4f95daab044fbf3c5fe7dc9da0ecf5f6b Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 20 Jun 2023 16:34:37 +0800 Subject: [PATCH 4/7] [SPARK-43939][CONNECT][PYTHON] Add try_* functions to Scala and Python --- .../main/scala/org/apache/spark/sql/functions.scala | 10 +++++----- .../main/scala/org/apache/spark/sql/functions.scala | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index aa316621de613..607782f68d7d6 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -1843,8 +1843,8 @@ object functions { def try_multiply(left: Column, right: Column): Column = Column.fn("try_multiply", left, right) /** - * Returns `left`-`right` and the result is null on overflow. The acceptable input types are the - * same with the `-` operator. + * Returns `left``-``right` and the result is null on overflow. The acceptable input types are + * the same with the `-` operator. * * @group math_funcs * @since 3.5.0 @@ -4067,8 +4067,8 @@ object functions { def try_to_binary(e: Column): Column = Column.fn("try_to_binary", e) /** - * Convert string 'e' to a number based on the string format `format`. Returns NULL if the - * string 'e' does not match the expected format. The format follows the same semantics as the + * Convert string `e` to a number based on the string format `format`. Returns NULL if the + * string `e` does not match the expected format. The format follows the same semantics as the * to_number function. * * @group string_funcs @@ -5352,7 +5352,7 @@ object functions { /** * (array, index) - Returns element of array at given (1-based) index. If Index is 0, Spark will - * throw an error. If index < 0, accesses elements from the last to the first. The function + * throw an error. If index < 0, accesses elements from the last to the first. The function * always returns NULL if the index exceeds the length of the array. * * (map, key) - Returns value for given key. The function always returns NULL if the key is not diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index f09fcf5429838..1f8eb1746ee24 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1916,7 +1916,7 @@ object functions { } /** - * Returns `left`-`right` and the result is null on overflow. The acceptable input types are + * Returns `left``-``right` and the result is null on overflow. The acceptable input types are * the same with the `-` operator. * * @group math_funcs @@ -4193,8 +4193,8 @@ object functions { } /** - * Convert string 'e' to a number based on the string format `format`. Returns NULL if the - * string 'e' does not match the expected format. The format follows the same semantics as the + * Convert string `e` to a number based on the string format `format`. Returns NULL if the + * string `e` does not match the expected format. The format follows the same semantics as the * to_number function. * * @group string_funcs @@ -5501,7 +5501,7 @@ object functions { /** * (array, index) - Returns element of array at given (1-based) index. If Index is 0, Spark will - * throw an error. If index < 0, accesses elements from the last to the first. The function + * throw an error. If index < 0, accesses elements from the last to the first. The function * always returns NULL if the index exceeds the length of the array. * * (map, key) - Returns value for given key. The function always returns NULL if the key is not From 31a79c11a7d7a5a48ccc01603981f91fae3f5585 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 20 Jun 2023 17:15:53 +0800 Subject: [PATCH 5/7] [SPARK-43939][CONNECT][PYTHON] Add try_* functions to Scala and Python --- .../scala/org/apache/spark/sql/functions.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 1f8eb1746ee24..966cfd7abc47b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1879,8 +1879,8 @@ object functions { * @group math_funcs * @since 3.5.0 */ - def try_add(left: Column, right: Column): Column = { - call_udf("try_add", left, right) + def try_add(left: Column, right: Column): Column = withExpr { + UnresolvedFunction("try_add", Seq(left.expr, right.expr), isDistinct = false) } /** @@ -1900,8 +1900,8 @@ object functions { * @group math_funcs * @since 3.5.0 */ - def try_divide(dividend: Column, divisor: Column): Column = { - call_udf("try_divide", dividend, divisor) + def try_divide(dividend: Column, divisor: Column): Column = withExpr { + UnresolvedFunction("try_divide", Seq(dividend.expr, divisor.expr), isDistinct = false) } /** @@ -1911,8 +1911,8 @@ object functions { * @group math_funcs * @since 3.5.0 */ - def try_multiply(left: Column, right: Column): Column = { - call_udf("try_multiply", left, right) + def try_multiply(left: Column, right: Column): Column = withExpr { + UnresolvedFunction("try_multiply", Seq(left.expr, right.expr), isDistinct = false) } /** @@ -1922,8 +1922,8 @@ object functions { * @group math_funcs * @since 3.5.0 */ - def try_subtract(left: Column, right: Column): Column = { - call_udf("try_subtract", left, right) + def try_subtract(left: Column, right: Column): Column = withExpr { + UnresolvedFunction("try_subtract", Seq(left.expr, right.expr), isDistinct = false) } /** From afefe85e057d88e5c0cd81c4e55994d70e1c8fd8 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 20 Jun 2023 19:09:04 +0800 Subject: [PATCH 6/7] [SPARK-43939][CONNECT][PYTHON] Add try_* functions to Scala and Python --- python/pyspark/sql/functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index b0652ad909b47..e26bddd869ad2 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -521,6 +521,7 @@ def try_subtract(left: "ColumnOrName", right: "ColumnOrName") -> Column: >>> df.select(try_subtract(df.a, df.b).alias('r')).collect() [Row(r=5985), Row(r=1988)] + >>> from pyspark.sql.types import StructType, StructField, IntegerType, StringType >>> schema = StructType([ ... StructField("i", IntegerType(), True), ... StructField("d", StringType(), True), From bdb35acb2b030db5949d492d7ac93e544eb88891 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 20 Jun 2023 22:18:53 +0800 Subject: [PATCH 7/7] [SPARK-43939][CONNECT][PYTHON] Add try_* functions to Scala and Python --- python/pyspark/sql/functions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index e26bddd869ad2..a1f46959e265c 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -369,6 +369,7 @@ def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column: >>> df.select(try_add(df.birth, df.age).alias('r')).collect() [Row(r=1997), Row(r=1992)] + >>> from pyspark.sql.types import StructType, StructField, IntegerType, StringType >>> schema = StructType([ ... StructField("i", IntegerType(), True), ... StructField("d", StringType(), True), @@ -441,7 +442,7 @@ def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column: >>> df = spark.createDataFrame([(1, 2)], ["year", "month"]) >>> df.select( - ... ttry_divide(make_interval(df.year), df.month).alias('r') + ... try_divide(make_interval(df.year), df.month).alias('r') ... ).show(truncate=False) +--------+ |r |