From e4c7948ee7d5a35f1f7ec2e1e5ebd8c3142105b7 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 1 Jun 2024 01:30:44 +0800 Subject: [PATCH] Fix warnings in GPU dask tests. --- python-package/xgboost/testing/dask.py | 41 +++++++++++++------ .../test_gpu_with_dask/test_gpu_with_dask.py | 6 +-- .../test_with_dask/test_with_dask.py | 23 +++++++---- 3 files changed, 48 insertions(+), 22 deletions(-) diff --git a/python-package/xgboost/testing/dask.py b/python-package/xgboost/testing/dask.py index 70e3dc219928..6841087de0cc 100644 --- a/python-package/xgboost/testing/dask.py +++ b/python-package/xgboost/testing/dask.py @@ -1,5 +1,7 @@ """Tests for dask shared by different test modules.""" +from typing import Literal + import numpy as np import pandas as pd from dask import array as da @@ -10,19 +12,26 @@ from xgboost.testing.updater import get_basescore -def check_init_estimation_clf(tree_method: str, client: Client) -> None: +def check_init_estimation_clf( + tree_method: str, device: Literal["cpu", "cuda"], client: Client +) -> None: """Test init estimation for classsifier.""" from sklearn.datasets import make_classification X, y = make_classification(n_samples=4096 * 2, n_features=32, random_state=1994) - clf = xgb.XGBClassifier(n_estimators=1, max_depth=1, tree_method=tree_method) + clf = xgb.XGBClassifier( + n_estimators=1, max_depth=1, tree_method=tree_method, device=device + ) clf.fit(X, y) base_score = get_basescore(clf) dx = da.from_array(X).rechunk(chunks=(32, None)) dy = da.from_array(y).rechunk(chunks=(32,)) dclf = xgb.dask.DaskXGBClassifier( - n_estimators=1, max_depth=1, tree_method=tree_method + n_estimators=1, + max_depth=1, + tree_method=tree_method, + device=device, ) dclf.client = client dclf.fit(dx, dy) @@ -30,20 +39,24 @@ def check_init_estimation_clf(tree_method: str, client: Client) -> None: np.testing.assert_allclose(base_score, dbase_score) -def check_init_estimation_reg(tree_method: str, client: Client) -> None: +def check_init_estimation_reg( + tree_method: str, device: Literal["cpu", "cuda"], client: Client +) -> None: """Test init estimation for regressor.""" from sklearn.datasets import make_regression # pylint: disable=unbalanced-tuple-unpacking X, y = make_regression(n_samples=4096 * 2, n_features=32, random_state=1994) - reg = xgb.XGBRegressor(n_estimators=1, max_depth=1, tree_method=tree_method) + reg = xgb.XGBRegressor( + n_estimators=1, max_depth=1, tree_method=tree_method, device=device + ) reg.fit(X, y) base_score = get_basescore(reg) dx = da.from_array(X).rechunk(chunks=(32, None)) dy = da.from_array(y).rechunk(chunks=(32,)) dreg = xgb.dask.DaskXGBRegressor( - n_estimators=1, max_depth=1, tree_method=tree_method + n_estimators=1, max_depth=1, tree_method=tree_method, device=device ) dreg.client = client dreg.fit(dx, dy) @@ -51,22 +64,26 @@ def check_init_estimation_reg(tree_method: str, client: Client) -> None: np.testing.assert_allclose(base_score, dbase_score) -def check_init_estimation(tree_method: str, client: Client) -> None: +def check_init_estimation( + tree_method: str, device: Literal["cpu", "cuda"], client: Client +) -> None: """Test init estimation.""" - check_init_estimation_reg(tree_method, client) - check_init_estimation_clf(tree_method, client) + check_init_estimation_reg(tree_method, device, client) + check_init_estimation_clf(tree_method, device, client) -def check_uneven_nan(client: Client, tree_method: str, n_workers: int) -> None: +def check_uneven_nan( + client: Client, tree_method: str, device: Literal["cpu", "cuda"], n_workers: int +) -> None: """Issue #9271, not every worker has missing value.""" assert n_workers >= 2 with client.as_current(): - clf = xgb.dask.DaskXGBClassifier(tree_method=tree_method) + clf = xgb.dask.DaskXGBClassifier(tree_method=tree_method, device=device) X = pd.DataFrame({"a": range(10000), "b": range(10000, 0, -1)}) y = pd.Series([*[0] * 5000, *[1] * 5000]) - X["a"][:3000:1000] = np.nan + X.loc[:3000:1000, "a"] = np.nan client.wait_for_workers(n_workers=n_workers) diff --git a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py index ced78a84b0d2..905947d874ee 100644 --- a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py +++ b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py @@ -230,13 +230,13 @@ def test_boost_from_prediction(self, local_cuda_client: Client) -> None: run_boost_from_prediction_multi_class(X, y, "hist", "cuda", local_cuda_client) def test_init_estimation(self, local_cuda_client: Client) -> None: - check_init_estimation("gpu_hist", local_cuda_client) + check_init_estimation("hist", "cuda", local_cuda_client) def test_uneven_nan(self) -> None: n_workers = 2 with LocalCUDACluster(n_workers=n_workers) as cluster: with Client(cluster) as client: - check_uneven_nan(client, "gpu_hist", n_workers) + check_uneven_nan(client, "hist", "cuda", n_workers) @pytest.mark.skipif(**tm.no_dask_cudf()) def test_dask_dataframe(self, local_cuda_client: Client) -> None: @@ -386,7 +386,7 @@ def test_dask_classifier(self, model: str, local_cuda_client: Client) -> None: X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X_)) y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y_)) w = dask_cudf.from_dask_dataframe(dd.from_dask_array(w_)) - run_dask_classifier(X, y, w, model, "gpu_hist", local_cuda_client, 10) + run_dask_classifier(X, y, w, model, "hist", "cuda", local_cuda_client, 10) def test_empty_dmatrix(self, local_cuda_client: Client) -> None: parameters = { diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py index ca55716bbd62..56abccb95ef5 100644 --- a/tests/test_distributed/test_with_dask/test_with_dask.py +++ b/tests/test_distributed/test_with_dask/test_with_dask.py @@ -12,7 +12,7 @@ from math import ceil from operator import attrgetter, getitem from pathlib import Path -from typing import Any, Dict, Generator, Optional, Tuple, Type, TypeVar, Union +from typing import Any, Dict, Generator, Literal, Optional, Tuple, Type, TypeVar, Union import hypothesis import numpy as np @@ -700,6 +700,7 @@ def run_dask_classifier( w: xgb.dask._DaskCollection, model: str, tree_method: Optional[str], + device: Literal["cpu", "cuda"], client: "Client", n_classes, ) -> None: @@ -707,11 +708,19 @@ def run_dask_classifier( if model == "boosting": classifier = xgb.dask.DaskXGBClassifier( - verbosity=1, n_estimators=2, eval_metric=metric, tree_method=tree_method + verbosity=1, + n_estimators=2, + eval_metric=metric, + tree_method=tree_method, + device=device, ) else: classifier = xgb.dask.DaskXGBRFClassifier( - verbosity=1, n_estimators=2, eval_metric=metric, tree_method=tree_method + verbosity=1, + n_estimators=2, + eval_metric=metric, + tree_method=tree_method, + device=device, ) assert classifier._estimator_type == "classifier" @@ -785,12 +794,12 @@ def test_dask_classifier(model: str, client: "Client") -> None: X, y, w = generate_array(with_weights=True) y = (y * 10).astype(np.int32) assert w is not None - run_dask_classifier(X, y, w, model, None, client, 10) + run_dask_classifier(X, y, w, model, None, "cpu", client, 10) y_bin = y.copy() y_bin[y > 5] = 1.0 y_bin[y <= 5] = 0.0 - run_dask_classifier(X, y_bin, w, model, None, client, 2) + run_dask_classifier(X, y_bin, w, model, None, "cpu", client, 2) def test_empty_dmatrix_training_continuation(client: "Client") -> None: @@ -2136,7 +2145,7 @@ def _() -> xgb.dask.DaskXGBClassifier: def test_init_estimation(client: Client) -> None: - check_init_estimation("hist", client) + check_init_estimation("hist", "cpu", client) @pytest.mark.parametrize("tree_method", ["hist", "approx"]) @@ -2144,7 +2153,7 @@ def test_uneven_nan(tree_method) -> None: n_workers = 2 with LocalCluster(n_workers=n_workers) as cluster: with Client(cluster) as client: - check_uneven_nan(client, tree_method, n_workers) + check_uneven_nan(client, tree_method, "cpu", n_workers) class TestDaskCallbacks: