Skip to content

Commit

Permalink
[SPARK-47005][PYTHON][DOCS] Refine docstring of `asc_nulls_first/asc_…
Browse files Browse the repository at this point in the history
…nulls_last/desc_nulls_first/desc_nulls_last`

### What changes were proposed in this pull request?
This pr refine docstring of  `asc_nulls_first/asc_nulls_last/desc_nulls_first/desc_nulls_last` and add some new examples.

### Why are the changes needed?
To improve PySpark documentation

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass Github Actions

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #45066 from LuciferYang/sort-funcs.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
  • Loading branch information
LuciferYang committed Feb 8, 2024
1 parent abf8770 commit 45956f7
Showing 1 changed file with 128 additions and 20 deletions.
148 changes: 128 additions & 20 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2889,7 +2889,7 @@ def getbit(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
@_try_remote_functions
def asc_nulls_first(col: "ColumnOrName") -> Column:
"""
Returns a sort expression based on the ascending order of the given
Sort Function: Returns a sort expression based on the ascending order of the given
column name, and null values return before non-null values.

.. versionadded:: 2.4.0
Expand All @@ -2909,10 +2909,11 @@ def asc_nulls_first(col: "ColumnOrName") -> Column:

Examples
--------
>>> df1 = spark.createDataFrame([(1, "Bob"),
... (0, None),
... (2, "Alice")], ["age", "name"])
>>> df1.sort(asc_nulls_first(df1.name)).show()
Example 1: Sorting a DataFrame with null values in ascending order

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([(1, "Bob"), (0, None), (2, "Alice")], ["age", "name"])
>>> df.sort(sf.asc_nulls_first(df.name)).show()
+---+-----+
|age| name|
+---+-----+
Expand All @@ -2921,6 +2922,32 @@ def asc_nulls_first(col: "ColumnOrName") -> Column:
| 1| Bob|
+---+-----+

Example 2: Sorting a DataFrame with multiple columns, null values in ascending order

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame(
... [(1, "Bob", None), (0, None, "Z"), (2, "Alice", "Y")], ["age", "name", "grade"])
>>> df.sort(sf.asc_nulls_first(df.name), sf.asc_nulls_first(df.grade)).show()
+---+-----+-----+
|age| name|grade|
+---+-----+-----+
| 0| NULL| Z|
| 2|Alice| Y|
| 1| Bob| NULL|
+---+-----+-----+

Example 3: Sorting a DataFrame with null values in ascending order using column name string

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([(1, "Bob"), (0, None), (2, "Alice")], ["age", "name"])
>>> df.sort(sf.asc_nulls_first("name")).show()
+---+-----+
|age| name|
+---+-----+
| 0| NULL|
| 2|Alice|
| 1| Bob|
+---+-----+
"""
return (
col.asc_nulls_first()
Expand All @@ -2932,7 +2959,7 @@ def asc_nulls_first(col: "ColumnOrName") -> Column:
@_try_remote_functions
def asc_nulls_last(col: "ColumnOrName") -> Column:
"""
Returns a sort expression based on the ascending order of the given
Sort Function: Returns a sort expression based on the ascending order of the given
column name, and null values appear after non-null values.

.. versionadded:: 2.4.0
Expand All @@ -2952,10 +2979,11 @@ def asc_nulls_last(col: "ColumnOrName") -> Column:

Examples
--------
>>> df1 = spark.createDataFrame([(0, None),
... (1, "Bob"),
... (2, "Alice")], ["age", "name"])
>>> df1.sort(asc_nulls_last(df1.name)).show()
Example 1: Sorting a DataFrame with null values in ascending order

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([(0, None), (1, "Bob"), (2, "Alice")], ["age", "name"])
>>> df.sort(sf.asc_nulls_last(df.name)).show()
+---+-----+
|age| name|
+---+-----+
Expand All @@ -2964,6 +2992,32 @@ def asc_nulls_last(col: "ColumnOrName") -> Column:
| 0| NULL|
+---+-----+

Example 2: Sorting a DataFrame with multiple columns, null values in ascending order

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame(
... [(0, None, "Z"), (1, "Bob", None), (2, "Alice", "Y")], ["age", "name", "grade"])
>>> df.sort(sf.asc_nulls_last(df.name), sf.asc_nulls_last(df.grade)).show()
+---+-----+-----+
|age| name|grade|
+---+-----+-----+
| 2|Alice| Y|
| 1| Bob| NULL|
| 0| NULL| Z|
+---+-----+-----+

Example 3: Sorting a DataFrame with null values in ascending order using column name string

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([(0, None), (1, "Bob"), (2, "Alice")], ["age", "name"])
>>> df.sort(sf.asc_nulls_last("name")).show()
+---+-----+
|age| name|
+---+-----+
| 2|Alice|
| 1| Bob|
| 0| NULL|
+---+-----+
"""
return (
col.asc_nulls_last() if isinstance(col, Column) else _invoke_function("asc_nulls_last", col)
Expand All @@ -2973,7 +3027,7 @@ def asc_nulls_last(col: "ColumnOrName") -> Column:
@_try_remote_functions
def desc_nulls_first(col: "ColumnOrName") -> Column:
"""
Returns a sort expression based on the descending order of the given
Sort Function: Returns a sort expression based on the descending order of the given
column name, and null values appear before non-null values.

.. versionadded:: 2.4.0
Expand All @@ -2993,10 +3047,11 @@ def desc_nulls_first(col: "ColumnOrName") -> Column:

Examples
--------
>>> df1 = spark.createDataFrame([(0, None),
... (1, "Bob"),
... (2, "Alice")], ["age", "name"])
>>> df1.sort(desc_nulls_first(df1.name)).show()
Example 1: Sorting a DataFrame with null values in descending order

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([(1, "Bob"), (0, None), (2, "Alice")], ["age", "name"])
>>> df.sort(sf.desc_nulls_first(df.name)).show()
+---+-----+
|age| name|
+---+-----+
Expand All @@ -3005,6 +3060,32 @@ def desc_nulls_first(col: "ColumnOrName") -> Column:
| 2|Alice|
+---+-----+

Example 2: Sorting a DataFrame with multiple columns, null values in descending order

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame(
... [(1, "Bob", None), (0, None, "Z"), (2, "Alice", "Y")], ["age", "name", "grade"])
>>> df.sort(sf.desc_nulls_first(df.name), sf.desc_nulls_first(df.grade)).show()
+---+-----+-----+
|age| name|grade|
+---+-----+-----+
| 0| NULL| Z|
| 1| Bob| NULL|
| 2|Alice| Y|
+---+-----+-----+

Example 3: Sorting a DataFrame with null values in descending order using column name string

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([(1, "Bob"), (0, None), (2, "Alice")], ["age", "name"])
>>> df.sort(sf.desc_nulls_first("name")).show()
+---+-----+
|age| name|
+---+-----+
| 0| NULL|
| 1| Bob|
| 2|Alice|
+---+-----+
"""
return (
col.desc_nulls_first()
Expand All @@ -3016,7 +3097,7 @@ def desc_nulls_first(col: "ColumnOrName") -> Column:
@_try_remote_functions
def desc_nulls_last(col: "ColumnOrName") -> Column:
"""
Returns a sort expression based on the descending order of the given
Sort Function: Returns a sort expression based on the descending order of the given
column name, and null values appear after non-null values.

.. versionadded:: 2.4.0
Expand All @@ -3036,10 +3117,11 @@ def desc_nulls_last(col: "ColumnOrName") -> Column:

Examples
--------
>>> df1 = spark.createDataFrame([(0, None),
... (1, "Bob"),
... (2, "Alice")], ["age", "name"])
>>> df1.sort(desc_nulls_last(df1.name)).show()
Example 1: Sorting a DataFrame with null values in descending order

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([(0, None), (1, "Bob"), (2, "Alice")], ["age", "name"])
>>> df.sort(sf.desc_nulls_last(df.name)).show()
+---+-----+
|age| name|
+---+-----+
Expand All @@ -3048,6 +3130,32 @@ def desc_nulls_last(col: "ColumnOrName") -> Column:
| 0| NULL|
+---+-----+

Example 2: Sorting a DataFrame with multiple columns, null values in descending order

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame(
... [(0, None, "Z"), (1, "Bob", None), (2, "Alice", "Y")], ["age", "name", "grade"])
>>> df.sort(sf.desc_nulls_last(df.name), sf.desc_nulls_last(df.grade)).show()
+---+-----+-----+
|age| name|grade|
+---+-----+-----+
| 1| Bob| NULL|
| 2|Alice| Y|
| 0| NULL| Z|
+---+-----+-----+

Example 3: Sorting a DataFrame with null values in descending order using column name string

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([(0, None), (1, "Bob"), (2, "Alice")], ["age", "name"])
>>> df.sort(sf.desc_nulls_last("name")).show()
+---+-----+
|age| name|
+---+-----+
| 1| Bob|
| 2|Alice|
| 0| NULL|
+---+-----+
"""
return (
col.desc_nulls_last()
Expand Down

0 comments on commit 45956f7

Please sign in to comment.