[SPARK-42896][SQL][PYTHON][FOLLOW-UP] Rename isBarrier to barrier, an…

…d correct docstring ### What changes were proposed in this pull request? This PR is a followup of proposes to fix: - Add `versionchanged` in its docstring. - Rename `isBarrier` to `barrier` to make it look Python friendly - Fix some wording and examples. ### Why are the changes needed? For better documentation, and make it more Python friendly. ### Does this PR introduce _any_ user-facing change? Yes, it renames the parameter, and fixes the documentation. ### How was this patch tested? Linters in this PR should test them out. Closes #40571 from HyukjinKwon/SPARK-42896-followup. Authored-by: Hyukjin Kwon <gurwls223@apache.org> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
apache · Mar 28, 2023 · ab4693d · ab4693d
1 parent 2319a31
commit ab4693d
Showing 1 changed file with 26 additions and 12 deletions.
diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py
@@ -32,7 +32,7 @@ class PandasMapOpsMixin:
     """
 
     def mapInPandas(
-        self, func: "PandasMapIterFunction", schema: Union[StructType, str], isBarrier: bool = False
+        self, func: "PandasMapIterFunction", schema: Union[StructType, str], barrier: bool = False
     ) -> "DataFrame":
         """
         Maps an iterator of batches in the current :class:`DataFrame` using a Python native
@@ -60,7 +60,11 @@ def mapInPandas(
         schema : :class:`pyspark.sql.types.DataType` or str
             the return type of the `func` in PySpark. The value can be either a
             :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
-        isBarrier : Use barrier mode execution if True.
+        barrier : bool, optional, default True
+            Use barrier mode execution.
+
+            .. versionchanged: 3.5.0
+                Added ``barrier`` argument.
 
         Examples
         --------
@@ -75,9 +79,12 @@ def mapInPandas(
         +---+---+
         |  1| 21|
         +---+---+
-        >>> # Set isBarrier=True to force the "mapInPandas" stage running in barrier mode,
-        >>> # it ensures all python UDF workers in the stage will be launched concurrently.
-        >>> df.mapInPandas(filter_func, df.schema, isBarrier=True).show()  # doctest: +SKIP
+
+        Set ``barrier`` to ``True`` to force the ``mapInPandas`` stage running in the
+        barrier mode, it ensures all Python workers in the stage will be
+        launched concurrently.
+
+        >>> df.mapInPandas(filter_func, df.schema, barrier=True).show()  # doctest: +SKIP
         +---+---+
         | id|age|
         +---+---+
@@ -102,11 +109,11 @@ def mapInPandas(
             func, returnType=schema, functionType=PythonEvalType.SQL_MAP_PANDAS_ITER_UDF
         )  # type: ignore[call-overload]
         udf_column = udf(*[self[col] for col in self.columns])
-        jdf = self._jdf.mapInPandas(udf_column._jc.expr(), isBarrier)
+        jdf = self._jdf.mapInPandas(udf_column._jc.expr(), barrier)
         return DataFrame(jdf, self.sparkSession)
 
     def mapInArrow(
-        self, func: "ArrowMapIterFunction", schema: Union[StructType, str], isBarrier: bool = False
+        self, func: "ArrowMapIterFunction", schema: Union[StructType, str], barrier: bool = False
     ) -> "DataFrame":
         """
         Maps an iterator of batches in the current :class:`DataFrame` using a Python native
@@ -131,7 +138,11 @@ def mapInArrow(
         schema : :class:`pyspark.sql.types.DataType` or str
             the return type of the `func` in PySpark. The value can be either a
             :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
-        isBarrier : Use barrier mode execution if True.
+        barrier : bool, optional, default True
+            Use barrier mode execution.
+
+            .. versionchanged: 3.5.0
+                Added ``barrier`` argument.
 
         Examples
         --------
@@ -147,9 +158,12 @@ def mapInArrow(
         +---+---+
         |  1| 21|
         +---+---+
-        >>> # Set isBarrier=True to force the "mapInArrow" stage running in barrier mode,
-        >>> # it ensures all python UDF workers in the stage will be launched concurrently.
-        >>> df.mapInArrow(filter_func, df.schema, isBarrier=True).show()  # doctest: +SKIP
+
+        Set ``barrier`` to ``True`` to force the ``mapInArrow`` stage running in the
+        barrier mode, it ensures all Python workers in the stage will be
+        launched concurrently.
+
+        >>> df.mapInArrow(filter_func, df.schema, barrier=True).show()  # doctest: +SKIP
         +---+---+
         | id|age|
         +---+---+
@@ -175,7 +189,7 @@ def mapInArrow(
             func, returnType=schema, functionType=PythonEvalType.SQL_MAP_ARROW_ITER_UDF
         )  # type: ignore[call-overload]
         udf_column = udf(*[self[col] for col in self.columns])
-        jdf = self._jdf.pythonMapInArrow(udf_column._jc.expr(), isBarrier)
+        jdf = self._jdf.pythonMapInArrow(udf_column._jc.expr(), barrier)
         return DataFrame(jdf, self.sparkSession)