From 5309c7789839e91703a43795292200edaa003107 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Sat, 11 May 2024 01:48:39 +0800
Subject: [PATCH] Fixes for the latest pandas.

---
 python-package/xgboost/data.py   | 40 ++++++++++++++++++--------------
 tests/python/test_with_pandas.py | 24 +++++++++++--------
 2 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py
index bae96051e90c..28ee57cb768b 100644
--- a/python-package/xgboost/data.py
+++ b/python-package/xgboost/data.py
@@ -370,10 +370,8 @@ def pandas_feature_info(
     if feature_names is None and meta is None:
         if isinstance(data.columns, pd.MultiIndex):
             feature_names = [" ".join([str(x) for x in i]) for i in data.columns]
-        elif isinstance(data.columns, (pd.Index, pd.RangeIndex)):
-            feature_names = list(map(str, data.columns))
         else:
-            feature_names = data.columns.format()
+            feature_names = list(data.columns.map(str))
 
     # handle feature types
     if feature_types is None and meta is None:
@@ -865,6 +863,22 @@ def _is_cudf_df(data: DataType) -> bool:
     return lazy_isinstance(data, "cudf.core.dataframe", "DataFrame")
 
 
+def _get_cudf_cat_predicate() -> Callable[[Any], bool]:
+    try:
+        from cudf import CategoricalDtype
+
+        def is_categorical_dtype(dtype: Any) -> bool:
+            return isinstance(dtype, CategoricalDtype)
+
+    except ImportError:
+        try:
+            from cudf.api.types import is_categorical_dtype  # type: ignore
+        except ImportError:
+            from cudf.utils.dtypes import is_categorical_dtype  # type: ignore
+
+    return is_categorical_dtype
+
+
 def _cudf_array_interfaces(data: DataType, cat_codes: list) -> bytes:
     """Extract CuDF __cuda_array_interface__.  This is special as it returns a new list
     of data and a list of array interfaces.  The data is list of categorical codes that
@@ -872,11 +886,7 @@ def _cudf_array_interfaces(data: DataType, cat_codes: list) -> bytes:
     array interface is finished.
 
     """
-    try:
-        from cudf.api.types import is_categorical_dtype
-    except ImportError:
-        from cudf.utils.dtypes import is_categorical_dtype
-
+    is_categorical_dtype = _get_cudf_cat_predicate()
     interfaces = []
 
     def append(interface: dict) -> None:
@@ -908,12 +918,13 @@ def _transform_cudf_df(
     feature_types: Optional[FeatureTypes],
     enable_categorical: bool,
 ) -> Tuple[ctypes.c_void_p, list, Optional[FeatureNames], Optional[FeatureTypes]]:
+
     try:
-        from cudf.api.types import is_bool_dtype, is_categorical_dtype
+        from cudf.api.types import is_bool_dtype
     except ImportError:
-        from cudf.utils.dtypes import is_categorical_dtype
         from pandas.api.types import is_bool_dtype
 
+    is_categorical_dtype = _get_cudf_cat_predicate()
     # Work around https://github.com/dmlc/xgboost/issues/10181
     if _is_cudf_ser(data):
         if is_bool_dtype(data.dtype):
@@ -941,15 +952,8 @@ def _transform_cudf_df(
             feature_names = [data.name]
         elif lazy_isinstance(data.columns, "cudf.core.multiindex", "MultiIndex"):
             feature_names = [" ".join([str(x) for x in i]) for i in data.columns]
-        elif (
-            lazy_isinstance(data.columns, "cudf.core.index", "RangeIndex")
-            or lazy_isinstance(data.columns, "cudf.core.index", "Int64Index")
-            # Unique to cuDF, no equivalence in pandas 1.3.3
-            or lazy_isinstance(data.columns, "cudf.core.index", "Int32Index")
-        ):
-            feature_names = list(map(str, data.columns))
         else:
-            feature_names = data.columns.format()
+            feature_names = list(data.columns.map(str))
 
     # handle feature types
     if feature_types is None:
diff --git a/tests/python/test_with_pandas.py b/tests/python/test_with_pandas.py
index 8194f5947c16..27be831d3f88 100644
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@@ -280,10 +280,12 @@ def test_pandas_sparse(self):
             }
         )
         y = pd.Series(pd.arrays.SparseArray(np.random.randn(rows)))
-        dtrain = xgb.DMatrix(X, y)
+        with pytest.warns(UserWarning, match="Sparse arrays from pandas"):
+            dtrain = xgb.DMatrix(X, y)
         booster = xgb.train({}, dtrain, num_boost_round=4)
-        predt_sparse = booster.predict(xgb.DMatrix(X))
-        predt_dense = booster.predict(xgb.DMatrix(X.sparse.to_dense()))
+        with pytest.warns(UserWarning, match="Sparse arrays from pandas"):
+            predt_sparse = booster.predict(xgb.DMatrix(X))
+            predt_dense = booster.predict(xgb.DMatrix(X.sparse.to_dense()))
         np.testing.assert_allclose(predt_sparse, predt_dense)
 
     def test_pandas_label(
@@ -572,14 +574,16 @@ def test_pandas_sparse_column_split(self):
         y = pd.Series(pd.arrays.SparseArray(np.random.randn(rows)))
 
         def verify_pandas_sparse():
-            dtrain = xgb.DMatrix(X, y, data_split_mode=DataSplitMode.COL)
+            with pytest.warns(UserWarning, match="Sparse arrays from pandas"):
+                dtrain = xgb.DMatrix(X, y, data_split_mode=DataSplitMode.COL)
             booster = xgb.train({}, dtrain, num_boost_round=4)
-            predt_sparse = booster.predict(
-                xgb.DMatrix(X, data_split_mode=DataSplitMode.COL)
-            )
-            predt_dense = booster.predict(
-                xgb.DMatrix(X.sparse.to_dense(), data_split_mode=DataSplitMode.COL)
-            )
+            with pytest.warns(UserWarning, match="Sparse arrays from pandas"):
+                predt_sparse = booster.predict(
+                    xgb.DMatrix(X, data_split_mode=DataSplitMode.COL)
+                )
+                predt_dense = booster.predict(
+                    xgb.DMatrix(X.sparse.to_dense(), data_split_mode=DataSplitMode.COL)
+                )
             np.testing.assert_allclose(predt_sparse, predt_dense)
 
         tm.run_with_rabit(world_size=3, test_fn=verify_pandas_sparse)