[SPARK-36265][PYTHON] Use __getitem__ instead of getItem to suppress …

…warnings ### What changes were proposed in this pull request? Use `Column.__getitem__` instead of `Column.getItem` to suppress warnings. ### Why are the changes needed? In pandas API on Spark code base, there are some places using `Column.getItem` with `Column` object, but it shows a deprecation warning. ### Does this PR introduce _any_ user-facing change? Yes, users won't see the warnings anymore. - before ```py >>> s = ps.Series(list("abbccc"), dtype="category") >>> s.astype(str) /path/to/spark/python/pyspark/sql/column.py:322: FutureWarning: A column as 'key' in getItem is deprecated as of Spark 3.0, and will not be supported in the future release. Use `column[key]` or `column.key` syntax instead. warnings.warn( 0 a 1 b 2 b 3 c 4 c 5 c dtype: object ``` - after ```py >>> s = ps.Series(list("abbccc"), dtype="category") >>> s.astype(str) 0 a 1 b 2 b 3 c 4 c 5 c dtype: object ``` ### How was this patch tested? Existing tests. Closes #33486 from ueshin/issues/SPARK-36265/getitem. Authored-by: Takuya UESHIN <ueshin@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
apache · Jul 23, 2021 · a76a087 · a76a087
1 parent a1a1974
commit a76a087
Show file tree

Hide file tree

Showing 4 changed files with 5 additions and 5 deletions.
diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
@@ -1580,7 +1580,7 @@ def factorize(
                     )
                 )
                 map_scol = F.create_map(*kvs)
-                scol = map_scol.getItem(self.spark.column)
+                scol = map_scol[self.spark.column]
             codes, uniques = self._with_new_scol(
                 scol.alias(self._internal.data_spark_column_names[0])
             ).factorize(na_sentinel=na_sentinel)
@@ -1636,7 +1636,7 @@ def factorize(
             map_scol = F.create_map(*kvs)
 
             null_scol = F.when(cond, SF.lit(na_sentinel_code))
-            new_scol = null_scol.otherwise(map_scol.getItem(scol))
+            new_scol = null_scol.otherwise(map_scol[scol])
 
         codes = self._with_new_scol(new_scol.alias(self._internal.data_spark_column_names[0]))
 

diff --git a/python/pyspark/pandas/data_type_ops/base.py b/python/pyspark/pandas/data_type_ops/base.py
@@ -128,7 +128,7 @@ def _as_categorical_type(
             )
             map_scol = F.create_map(*kvs)
 
-            scol = F.coalesce(map_scol.getItem(index_ops.spark.column), SF.lit(-1))
+            scol = F.coalesce(map_scol[index_ops.spark.column], SF.lit(-1))
         return index_ops._with_new_scol(
             scol.cast(spark_type),
             field=index_ops._internal.data_fields[0].copy(

diff --git a/python/pyspark/pandas/data_type_ops/categorical_ops.py b/python/pyspark/pandas/data_type_ops/categorical_ops.py
@@ -68,7 +68,7 @@ def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> Ind
                 *[(SF.lit(code), SF.lit(category)) for code, category in enumerate(categories)]
             )
             map_scol = F.create_map(*kvs)
-            scol = map_scol.getItem(index_ops.spark.column)
+            scol = map_scol[index_ops.spark.column]
         return index_ops._with_new_scol(scol).astype(dtype)
 
     def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:

diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
@@ -10854,7 +10854,7 @@ def quantile(spark_column: Column, spark_type: DataType) -> Column:
             for column in percentile_col_names:
                 cols_dict[column] = list()
                 for i in range(len(qq)):
-                    cols_dict[column].append(scol_for(sdf, column).getItem(i).alias(column))
+                    cols_dict[column].append(scol_for(sdf, column)[i].alias(column))
 
             internal_index_column = SPARK_DEFAULT_INDEX_NAME
             cols = []