Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/dmlc/xgboost into optimiz…
Browse files Browse the repository at this point in the history
…ation_part_applysplit
  • Loading branch information
ShvetsKS committed Jun 26, 2022
2 parents 7780172 + 0725fd6 commit 7b7ca83
Show file tree
Hide file tree
Showing 197 changed files with 2,981 additions and 3,185 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/r_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
strategy:
matrix:
config:
- {os: windows-latest, r: 'release', compiler: 'mingw', build: 'autotools'}
- {os: ubuntu-latest, r: 'release'}
env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
RSPM: ${{ matrix.config.rspm }}
Expand Down Expand Up @@ -49,8 +49,9 @@ jobs:
- name: Run lintr
run: |
cd R-package
R.exe CMD INSTALL .
Rscript.exe tests/helper_scripts/run_lint.R
R CMD INSTALL .
# Disable lintr errors for now: https://github.com/dmlc/xgboost/issues/8012
Rscript tests/helper_scripts/run_lint.R || true
test-with-R:
runs-on: ${{ matrix.config.os }}
Expand Down
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,11 @@ if (USE_OPENMP)
endif (APPLE)
find_package(OpenMP REQUIRED)
endif (USE_OPENMP)
#Add for IBM i
if (${CMAKE_SYSTEM_NAME} MATCHES "OS400")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> -X64 qc <TARGET> <OBJECTS>")
endif()

if (USE_NCCL)
find_package(Nccl REQUIRED)
Expand Down Expand Up @@ -201,6 +206,10 @@ endif (JVM_BINDINGS)
# Plugin
add_subdirectory(${xgboost_SOURCE_DIR}/plugin)

if (PLUGIN_RMM)
find_package(rmm REQUIRED)
endif (PLUGIN_RMM)

#-- library
if (BUILD_STATIC_LIB)
add_library(xgboost STATIC)
Expand Down
4 changes: 2 additions & 2 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def TestCppGPU(args) {
node(nodeReq) {
unstash name: "xgboost_cpp_tests_cuda${artifact_cuda_version}"
unstash name: 'srcs'
echo "Test C++, CUDA ${args.host_cuda_version}"
echo "Test C++, CUDA ${args.host_cuda_version}, rmm: ${args.test_rmm}"
def container_type = "gpu"
def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
Expand All @@ -410,7 +410,7 @@ def TestCppGPU(args) {
docker_binary = "nvidia-docker"
docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
sh """
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "source activate gpu_test && build/testxgboost --use-rmm-pool --gtest_filter=-*DeathTest.*"
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "source activate gpu_test && build/testxgboost --use-rmm-pool"
"""
}
deleteDir()
Expand Down
2 changes: 1 addition & 1 deletion R-package/tests/helper_scripts/run_lint.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ my_linters <- list(
object_usage_linter = lintr::object_usage_linter,
object_length_linter = lintr::object_length_linter,
open_curly_linter = lintr::open_curly_linter,
semicolon = lintr::semicolon_terminator_linter,
semicolon = lintr::semicolon_terminator_linter(semicolon = c("compound", "trailing")),
seq = lintr::seq_linter,
spaces_inside_linter = lintr::spaces_inside_linter,
spaces_left_parentheses_linter = lintr::spaces_left_parentheses_linter,
Expand Down
1 change: 0 additions & 1 deletion amalgamation/xgboost-all0.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
#include "../src/tree/tree_updater.cc"
#include "../src/tree/updater_approx.cc"
#include "../src/tree/updater_colmaker.cc"
#include "../src/tree/updater_histmaker.cc"
#include "../src/tree/updater_prune.cc"
#include "../src/tree/updater_quantile_hist.cc"
#include "../src/tree/updater_refresh.cc"
Expand Down
52 changes: 43 additions & 9 deletions cmake/Utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,15 @@ function(xgboost_set_cuda_flags target)
set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES})
endif (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")

if (FORCE_COLORED_OUTPUT)
if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
target_compile_options(${target} PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fdiagnostics-color=always>)
endif()
endif (FORCE_COLORED_OUTPUT)

if (USE_DEVICE_DEBUG)
target_compile_options(${target} PRIVATE
$<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:CUDA>>:-G;-src-in-ptx>)
Expand All @@ -169,10 +178,17 @@ function(xgboost_set_cuda_flags target)
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>)
endif (MSVC)

set_target_properties(${target} PROPERTIES
CUDA_STANDARD 14
CUDA_STANDARD_REQUIRED ON
CUDA_SEPARABLE_COMPILATION OFF)
if (PLUGIN_RMM)
set_target_properties(${target} PROPERTIES
CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON
CUDA_SEPARABLE_COMPILATION OFF)
else ()
set_target_properties(${target} PROPERTIES
CUDA_STANDARD 14
CUDA_STANDARD_REQUIRED ON
CUDA_SEPARABLE_COMPILATION OFF)
endif (PLUGIN_RMM)
endfunction(xgboost_set_cuda_flags)

macro(xgboost_link_nccl target)
Expand All @@ -189,10 +205,18 @@ endmacro(xgboost_link_nccl)

# compile options
macro(xgboost_target_properties target)
set_target_properties(${target} PROPERTIES
CXX_STANDARD 14
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON)
if (PLUGIN_RMM)
set_target_properties(${target} PROPERTIES
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON)
else ()
set_target_properties(${target} PROPERTIES
CXX_STANDARD 14
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON)
endif (PLUGIN_RMM)

if (HIDE_CXX_SYMBOLS)
#-- Hide all C++ symbols
set_target_properties(${target} PROPERTIES
Expand All @@ -204,7 +228,9 @@ macro(xgboost_target_properties target)

if (ENABLE_ALL_WARNINGS)
target_compile_options(${target} PUBLIC
$<IF:$<COMPILE_LANGUAGE:CUDA>,-Xcompiler=-Wall -Xcompiler=-Wextra,-Wall -Wextra>
$<IF:$<COMPILE_LANGUAGE:CUDA>,
-Xcompiler=-Wall -Xcompiler=-Wextra -Xcompiler=-Wno-expansion-to-defined,
-Wall -Wextra -Wno-expansion-to-defined>
)
endif(ENABLE_ALL_WARNINGS)

Expand Down Expand Up @@ -247,6 +273,10 @@ macro(xgboost_target_defs target)
PRIVATE
-DXGBOOST_BUILTIN_PREFETCH_PRESENT=1)
endif (XGBOOST_BUILTIN_PREFETCH_PRESENT)

if (PLUGIN_RMM)
target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
endif (PLUGIN_RMM)
endmacro(xgboost_target_defs)

# handles dependencies
Expand All @@ -269,6 +299,10 @@ macro(xgboost_target_link_libraries target)
xgboost_set_cuda_flags(${target})
endif (USE_CUDA)

if (PLUGIN_RMM)
target_link_libraries(${target} PRIVATE rmm::rmm)
endif (PLUGIN_RMM)

if (USE_NCCL)
xgboost_link_nccl(${target})
endif (USE_NCCL)
Expand Down
11 changes: 11 additions & 0 deletions demo/nvflare/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
This directory contains a demo of Federated Learning using
[NVFlare](https://nvidia.github.io/NVFlare/).

## Training with CPU only

To run the demo, first build XGBoost with the federated learning plugin enabled (see the
[README](../../plugin/federated/README.md)).

Expand Down Expand Up @@ -53,3 +55,12 @@ Finally, shutdown everything from the admin CLI:
shutdown client
shutdown server
```

## Training with GPUs

To demo with Federated Learning using GPUs, make sure your machine has at least 2 GPUs.
Build XGBoost with the federated learning plugin enabled along with CUDA, but with NCCL
turned off (see the [README](../../plugin/federated/README.md)).

Modify `config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps
above.
3 changes: 2 additions & 1 deletion demo/nvflare/config/config_fed_client.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
"world_size": 2,
"server_cert_path": "server-cert.pem",
"client_key_path": "client-key.pem",
"client_cert_path": "client-cert.pem"
"client_cert_path": "client-cert.pem",
"use_gpus": "false"
}
}
}
Expand Down
7 changes: 6 additions & 1 deletion demo/nvflare/custom/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class SupportedTasks(object):

class XGBoostTrainer(Executor):
def __init__(self, server_address: str, world_size: int, server_cert_path: str,
client_key_path: str, client_cert_path: str):
client_key_path: str, client_cert_path: str, use_gpus: bool):
"""Trainer for federated XGBoost.
Args:
Expand All @@ -32,6 +32,7 @@ def __init__(self, server_address: str, world_size: int, server_cert_path: str,
self._server_cert_path = server_cert_path
self._client_key_path = client_key_path
self._client_cert_path = client_cert_path
self._use_gpus = use_gpus

def execute(self, task_name: str, shareable: Shareable, fl_ctx: FLContext,
abort_signal: Signal) -> Shareable:
Expand Down Expand Up @@ -66,6 +67,10 @@ def _do_training(self, fl_ctx: FLContext):

# Specify parameters via map, definition are same as c++ version
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
if self._use_gpus:
self.log_info(fl_ctx, f'Training with GPU {rank}')
param['tree_method'] = 'gpu_hist'
param['gpu_id'] = rank

# Specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
Expand Down
4 changes: 2 additions & 2 deletions doc/build.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,9 @@ From the command line on Linux starting from the XGBoost directory:

To speed up compilation, the compute version specific to your GPU could be passed to cmake as, e.g., ``-DGPU_COMPUTE_VER=50``. A quick explanation and numbers for some architectures can be found `in this page <https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/>`_.

.. note:: Enabling distributed GPU training
.. note:: Faster distributed GPU training with NCCL

By default, distributed GPU training is disabled and only a single GPU will be used. To enable distributed GPU training, set the option ``USE_NCCL=ON``. Distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **distributed GPU training is available only for Linux**.
By default, distributed GPU training is enabled and uses Rabit for communication. For faster training, set the option ``USE_NCCL=ON``. Faster distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **faster distributed GPU training is available only for Linux**.

.. code-block:: bash
Expand Down
23 changes: 23 additions & 0 deletions doc/contrib/ci.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,26 @@ machine in GitHub Actions, cross-compilation is needed; ``cibuildwheel`` takes c
task of cross-compiling a Python wheel. (Note that ``cibuildwheel`` will call
``setup.py bdist_wheel``. Since XGBoost has a native library component, ``setup.py`` contains
a glue code to call CMake and a C++ compiler to build the native library on the fly.)

*******************************
Reproducing errors from Jenkins
*******************************

It is often useful to reproduce the particular testing environment from our Jenkins server for
the purpose of troubleshooting a failing test. We use Docker containers heavily to package
the testing environment, so you can use Docker to reproduce it on your own machine.

1. Install Docker: https://docs.docker.com/engine/install/ubuntu/
2. Install NVIDIA Docker runtime: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian
The runtime lets you access NVIDIA GPUs inside a Docker container.
3. In a build log, all tests are invoked via the wrapper script ``tests/ci_build/ci_build.sh``.
Identify the test you'd like to reproduce locally, and note how the wrapper script was invoked for that test.
The invocation should look like this:

.. code-block:: bash
CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g' tests/ci_build/ci_build.sh gpu nvidia-docker \
--build-arg CUDA_VERSION_ARG=11.0 tests/ci_build/test_python.sh mgpu --use-rmm-pool
4. You can now run the same command on your own machine. The wrapper script will automatically download and
set up the correct Docker container(s).
10 changes: 10 additions & 0 deletions doc/jvm/xgboost4j_spark_tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,16 @@ and then loading the model in another session:
val xgbClassificationModel2 = XGBoostClassificationModel.load(xgbClassificationModelPath)
xgbClassificationModel2.transform(xgbInput)
.. note::

Besides dumping the model to raw format, users are able to dump the model to be json or ubj format from ``version 2.0.0+``.

.. code-block:: scala
val xgbClassificationModelPath = "/tmp/xgbClassificationModel"
xgbClassificationModel.write.overwrite().option("format", "json").save(xgbClassificationModelPath)
With regards to ML pipeline save and load, please refer the next section.

Interact with Other Bindings of XGBoost
Expand Down
31 changes: 18 additions & 13 deletions doc/parameter.rst
Original file line number Diff line number Diff line change
Expand Up @@ -151,15 +151,6 @@ Parameters for Tree Booster
- ``hist``: Faster histogram optimized approximate greedy algorithm.
- ``gpu_hist``: GPU implementation of ``hist`` algorithm.

* ``sketch_eps`` [default=0.03]

- Only used for ``updater=grow_local_histmaker``.
- This roughly translates into ``O(1 / sketch_eps)`` number of bins.
Compared to directly select number of bins, this comes with theoretical guarantee with sketch accuracy.
- Usually user does not have to tune this.
But consider setting to a lower number for more accurate enumeration of split candidates.
- range: (0, 1)

* ``scale_pos_weight`` [default=1]

- Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: ``sum(negative instances) / sum(positive instances)``. See :doc:`Parameters Tuning </tutorials/param_tuning>` for more discussion. Also, see Higgs Kaggle competition demo for examples: `R <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-train.R>`_, `py1 <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-numpy.py>`_, `py2 <https://github.com/dmlc/xgboost/blob/master/demo/kaggle-higgs/higgs-cv.py>`_, `py3 <https://github.com/dmlc/xgboost/blob/master/demo/guide-python/cross_validation.py>`_.
Expand All @@ -170,7 +161,6 @@ Parameters for Tree Booster

- ``grow_colmaker``: non-distributed column-based construction of trees.
- ``grow_histmaker``: distributed tree construction with row-based data splitting based on global proposal of histogram counting.
- ``grow_local_histmaker``: based on local histogram counting.
- ``grow_quantile_histmaker``: Grow tree using quantized histogram.
- ``grow_gpu_hist``: Grow tree with GPU.
- ``sync``: synchronizes trees in all distributed nodes.
Expand Down Expand Up @@ -235,21 +225,36 @@ Parameters for Tree Booster
list is a group of indices of features that are allowed to interact with each other.
See :doc:`/tutorials/feature_interaction_constraint` for more information.

Additional parameters for ``hist``, ``gpu_hist`` and ``approx`` tree method
===========================================================================
.. _cat-param:

Parameters for Categorical Feature
==================================

These parameters are only used for training with categorical data. See
:doc:`/tutorials/categorical` for more information.

* ``max_cat_to_onehot``

.. versionadded:: 1.6

.. note:: The support for this parameter is experimental.
.. note:: This parameter is experimental. ``exact`` tree method is not supported yet.

- A threshold for deciding whether XGBoost should use one-hot encoding based split for
categorical data. When number of categories is lesser than the threshold then one-hot
encoding is chosen, otherwise the categories will be partitioned into children nodes.
Only relevant for regression and binary classification. Also, ``exact`` tree method is
not supported

* ``max_cat_threshold``

.. versionadded:: 2.0

.. note:: This parameter is experimental. ``exact`` and ``gpu_hist`` tree methods are
not supported yet.

- Maximum number of categories considered for each split. Used only by partition-based
splits for preventing over-fitting.

Additional parameters for Dart Booster (``booster=dart``)
=========================================================

Expand Down
Loading

0 comments on commit 7b7ca83

Please sign in to comment.