dmlc · trivialfis · May 31, 2024 · May 30, 2024
diff --git a/demo/dask/gpu_training.py b/demo/dask/gpu_training.py
@@ -3,7 +3,7 @@
 ====================================
 """
 
-import cupy as cp
+import dask
 import dask_cudf
 from dask import array as da
 from dask import dataframe as dd
@@ -24,12 +24,8 @@ def using_dask_matrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
     # history obtained from evaluation metrics.
     output = dxgb.train(
         client,
-        {
-            "verbosity": 2,
-            "tree_method": "hist",
-            # Golden line for GPU training
-            "device": "cuda",
-        },
+        # Make sure the device is set to CUDA.
+        {"tree_method": "hist", "device": "cuda"},
         dtrain,
         num_boost_round=4,
         evals=[(dtrain, "train")],
@@ -50,18 +46,17 @@ def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> d
     .. versionadded:: 1.2.0
 
     """
-    X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X))
-    y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y))
-
     # `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful that it can not
     # be used for anything else other than training unless a reference is specified. See
     # the `ref` argument of `DaskQuantileDMatrix`.
     dtrain = dxgb.DaskQuantileDMatrix(client, X, y)
     output = dxgb.train(
         client,
-        {"verbosity": 2, "tree_method": "hist", "device": "cuda"},
+        # Make sure the device is set to CUDA.
+        {"tree_method": "hist", "device": "cuda"},
         dtrain,
         num_boost_round=4,
+        evals=[(dtrain, "train")],
     )
 
     prediction = dxgb.predict(client, output, X)
@@ -72,15 +67,23 @@ def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> d
     # `LocalCUDACluster` is used for assigning GPU to XGBoost processes.  Here
     # `n_workers` represents the number of GPUs since we use one GPU per worker process.
     with LocalCUDACluster(n_workers=2, threads_per_worker=4) as cluster:
-        with Client(cluster) as client:
-            # generate some random data for demonstration
+        # Create client from cluster, set the backend to GPU array (cupy).
+        with Client(cluster) as client, dask.config.set({"array.backend": "cupy"}):
+            # Generate some random data for demonstration
             rng = da.random.default_rng(1)
 
-            m = 100000
+            m = 2**18
             n = 100
-            X = rng.normal(size=(m, n))
+            X = rng.uniform(size=(m, n), chunks=(128**2, -1))
             y = X.sum(axis=1)
 
+            X = dd.from_dask_array(X)
+            y = dd.from_dask_array(y)
+            # XGBoost can take arrays. This is to show that DataFrame uses the GPU
+            # backend as well.
+            assert isinstance(X, dask_cudf.DataFrame)
+            assert isinstance(y, dask_cudf.Series)
+
             print("Using DaskQuantileDMatrix")
             from_ddqdm = using_quantile_device_dmatrix(client, X, y)
             print("Using DMatrix")

diff --git a/demo/dask/sklearn_gpu_training.py b/demo/dask/sklearn_gpu_training.py
@@ -3,6 +3,7 @@
 ===================================================================
 """
 
+import dask
 from dask import array as da
 from dask.distributed import Client
 
@@ -13,17 +14,18 @@
 
 
 def main(client: Client) -> dxgb.Booster:
-    # generate some random data for demonstration
+    # Generate some random data for demonstration
+    rng = da.random.default_rng(1)
+
+    m = 2**18
     n = 100
-    m = 1000000
-    partition_size = 10000
-    X = da.random.random((m, n), partition_size)
-    y = da.random.random(m, partition_size)
+    X = rng.uniform(size=(m, n), chunks=(128**2, -1))
+    y = X.sum(axis=1)
 
     regressor = dxgb.DaskXGBRegressor(verbosity=1)
-    # set the device to CUDA
+    # Set the device to CUDA
     regressor.set_params(tree_method="hist", device="cuda")
-    # assigning client here is optional
+    # Assigning client here is optional
     regressor.client = client
 
     regressor.fit(X, y, eval_set=[(X, y)])
@@ -42,5 +44,6 @@ def main(client: Client) -> dxgb.Booster:
     # With dask cuda, one can scale up XGBoost to arbitrary GPU clusters.
     # `LocalCUDACluster` used here is only for demonstration purpose.
     with LocalCUDACluster() as cluster:
-        with Client(cluster) as client:
+        # Create client from cluster, set the backend to GPU array (cupy).
+        with Client(cluster) as client, dask.config.set({"array.backend": "cupy"}):
             main(client)