diff --git a/.github/workflows/i386.yml b/.github/workflows/i386.yml
index 1c4e98010310..de8d7b25bac7 100644
--- a/.github/workflows/i386.yml
+++ b/.github/workflows/i386.yml
@@ -19,7 +19,7 @@ jobs:
ports:
- 5000:5000
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- name: Set up Docker Buildx
diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml
index 9ef314ca5b0b..c2b6ca6a7483 100644
--- a/.github/workflows/jvm_tests.yml
+++ b/.github/workflows/jvm_tests.yml
@@ -16,26 +16,23 @@ jobs:
strategy:
fail-fast: false
matrix:
- os: [windows-latest, ubuntu-latest, macos-11]
+ os: [windows-latest, ubuntu-latest, macos-13]
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- - uses: mamba-org/setup-micromamba@422500192359a097648154e8db4e39bdb6c6eed7 # v1.8.1
+ - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- micromamba-version: '1.5.6-0'
- environment-name: jvm_tests
- create-args: >-
- python=3.10
- awscli
- cache-downloads: true
- cache-environment: true
- init-shell: bash powershell
+ miniforge-variant: Mambaforge
+ miniforge-version: latest
+ activate-environment: jvm_tests
+ environment-file: tests/ci_build/conda_env/jvm_tests.yml
+ use-mamba: true
- name: Cache Maven packages
- uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
+ uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}
@@ -61,7 +58,7 @@ jobs:
id: extract_branch
if: |
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
- (matrix.os == 'windows-latest' || matrix.os == 'macos-11')
+ (matrix.os == 'windows-latest' || matrix.os == 'macos-13')
- name: Publish artifact xgboost4j.dll to S3
run: |
@@ -85,7 +82,7 @@ jobs:
python -m awscli s3 cp libxgboost4j_${{ github.sha }}.dylib s3://xgboost-nightly-builds/${{ steps.extract_branch.outputs.branch }}/libxgboost4j/ --acl public-read --region us-west-2
if: |
(github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')) &&
- matrix.os == 'macos-11'
+ matrix.os == 'macos-13'
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 4755f9aaaad8..f5ecb94f68b1 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -21,9 +21,9 @@ jobs:
strategy:
fail-fast: false
matrix:
- os: [macos-11]
+ os: [macos-12]
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- name: Install system packages
@@ -33,7 +33,7 @@ jobs:
run: |
mkdir build
cd build
- cmake .. -DGOOGLE_TEST=ON -DUSE_OPENMP=ON -DUSE_DMLC_GTEST=ON -GNinja -DBUILD_DEPRECATED_CLI=ON
+ cmake .. -DGOOGLE_TEST=ON -DUSE_OPENMP=ON -DUSE_DMLC_GTEST=ON -GNinja -DBUILD_DEPRECATED_CLI=ON -DUSE_SANITIZER=ON -DENABLED_SANITIZERS=address -DCMAKE_BUILD_TYPE=RelWithDebInfo
ninja -v
- name: Run gtest binary
run: |
@@ -49,7 +49,7 @@ jobs:
matrix:
os: [ubuntu-latest]
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- name: Install system packages
@@ -76,16 +76,16 @@ jobs:
os: [ubuntu-latest]
python-version: ["3.8"]
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
+ - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- cache-downloads: true
- cache-env: true
- environment-name: linux_sycl_test
+ miniforge-variant: Mambaforge
+ miniforge-version: latest
+ activate-environment: linux_sycl_test
environment-file: tests/ci_build/conda_env/linux_sycl_test.yml
-
+ use-mamba: true
- name: Display Conda env
run: |
conda info
@@ -118,15 +118,16 @@ jobs:
os: ["ubuntu-latest"]
python-version: ["3.8"]
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
+ - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- cache-downloads: true
- cache-env: true
- environment-name: cpp_test
+ miniforge-variant: Mambaforge
+ miniforge-version: latest
+ activate-environment: cpp_test
environment-file: tests/ci_build/conda_env/cpp_test.yml
+ use-mamba: true
- name: Display Conda env
run: |
conda info
@@ -155,8 +156,9 @@ jobs:
- name: Build and install XGBoost shared library
run: |
cd build
- cmake .. -DBUILD_STATIC_LIB=OFF -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -GNinja
+ cmake .. -DBUILD_STATIC_LIB=OFF -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -GNinja -DPLUGIN_FEDERATED=ON -DGOOGLE_TEST=ON
ninja -v install
+ ./testxgboost
cd -
- name: Build and run C API demo with shared
run: |
@@ -175,10 +177,10 @@ jobs:
runs-on: ubuntu-latest
name: Code linting for C++
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
+ - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
with:
python-version: "3.8"
architecture: 'x64'
diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml
index f0cad6382d87..e6eec86c8606 100644
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -21,15 +21,16 @@ jobs:
matrix:
os: [ubuntu-latest]
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
+ - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- cache-downloads: true
- cache-env: true
- environment-name: python_lint
+ miniforge-variant: Mambaforge
+ miniforge-version: latest
+ activate-environment: python_lint
environment-file: tests/ci_build/conda_env/python_lint.yml
+ use-mamba: true
- name: Display Conda env
run: |
conda info
@@ -52,15 +53,16 @@ jobs:
matrix:
os: [ubuntu-latest]
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
+ - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- cache-downloads: true
- cache-env: true
- environment-name: sdist_test
+ miniforge-variant: Mambaforge
+ miniforge-version: latest
+ activate-environment: sdist_test
environment-file: tests/ci_build/conda_env/sdist_test.yml
+ use-mamba: true
- name: Display Conda env
run: |
conda info
@@ -81,14 +83,14 @@ jobs:
name: Test installing XGBoost Python source package on ${{ matrix.os }}
strategy:
matrix:
- os: [macos-11, windows-latest]
+ os: [macos-13, windows-latest]
python-version: ["3.8"]
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- name: Install osx system dependencies
- if: matrix.os == 'macos-11'
+ if: matrix.os == 'macos-13'
run: |
brew install ninja libomp
- uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
@@ -119,19 +121,20 @@ jobs:
strategy:
matrix:
config:
- - {os: macos-11}
+ - {os: macos-13}
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
+ - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- cache-downloads: true
- cache-env: true
- environment-name: macos_test
+ miniforge-variant: Mambaforge
+ miniforge-version: latest
+ activate-environment: macos_cpu_test
environment-file: tests/ci_build/conda_env/macos_cpu_test.yml
+ use-mamba: true
- name: Display Conda env
run: |
@@ -174,7 +177,7 @@ jobs:
- {os: windows-latest, python-version: '3.8'}
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
@@ -218,16 +221,17 @@ jobs:
- {os: ubuntu-latest, python-version: "3.8"}
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
+ - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- cache-downloads: true
- cache-env: true
- environment-name: linux_cpu_test
+ miniforge-variant: Mambaforge
+ miniforge-version: latest
+ activate-environment: linux_cpu_test
environment-file: tests/ci_build/conda_env/linux_cpu_test.yml
+ use-mamba: true
- name: Display Conda env
run: |
@@ -270,16 +274,17 @@ jobs:
- {os: ubuntu-latest, python-version: "3.8"}
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
+ - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
- cache-downloads: true
- cache-env: true
- environment-name: linux_sycl_test
+ miniforge-variant: Mambaforge
+ miniforge-version: latest
+ activate-environment: linux_sycl_test
environment-file: tests/ci_build/conda_env/linux_sycl_test.yml
+ use-mamba: true
- name: Display Conda env
run: |
@@ -309,12 +314,12 @@ jobs:
os: [ubuntu-latest]
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- name: Set up Python 3.8
- uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
+ uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
with:
python-version: 3.8
diff --git a/.github/workflows/python_wheels.yml b/.github/workflows/python_wheels.yml
index 090b1f830213..f3e7d5817479 100644
--- a/.github/workflows/python_wheels.yml
+++ b/.github/workflows/python_wheels.yml
@@ -25,10 +25,10 @@ jobs:
- os: macos-14
platform_id: macosx_arm64
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- - uses: conda-incubator/setup-miniconda@v3.0.4
+ - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
miniforge-variant: Mambaforge
miniforge-version: latest
diff --git a/.github/workflows/r_nold.yml b/.github/workflows/r_nold.yml
index 887470190035..4b506927e06c 100644
--- a/.github/workflows/r_nold.yml
+++ b/.github/workflows/r_nold.yml
@@ -27,7 +27,7 @@ jobs:
run: |
apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git -y
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml
index f3d83b823aff..9fb9d4684ad1 100644
--- a/.github/workflows/r_tests.yml
+++ b/.github/workflows/r_tests.yml
@@ -25,7 +25,7 @@ jobs:
RSPM: ${{ matrix.config.rspm }}
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
@@ -34,7 +34,7 @@ jobs:
r-version: ${{ matrix.config.r }}
- name: Cache R packages
- uses: actions/cache@937d24475381cd9c75ae6db12cb4e79714b926ed # v3.0.11
+ uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }}
@@ -69,7 +69,7 @@ jobs:
sudo apt update
sudo apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev
if: matrix.config.os == 'ubuntu-latest'
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
@@ -78,13 +78,13 @@ jobs:
r-version: ${{ matrix.config.r }}
- name: Cache R packages
- uses: actions/cache@937d24475381cd9c75ae6db12cb4e79714b926ed # v3.0.11
+ uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }}
restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }}
- - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
+ - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
with:
python-version: "3.8"
architecture: 'x64'
@@ -123,7 +123,7 @@ jobs:
run: |
git config --global --add safe.directory "${GITHUB_WORKSPACE}"
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 4651e2ac0dff..222700da4a58 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -22,7 +22,7 @@ jobs:
steps:
- name: "Checkout code"
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
persist-credentials: false
@@ -41,7 +41,7 @@ jobs:
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
- uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
+ uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
with:
name: SARIF file
path: results.sarif
diff --git a/.github/workflows/update_rapids.yml b/.github/workflows/update_rapids.yml
index 9f9c85f62e28..9490926cfcaf 100644
--- a/.github/workflows/update_rapids.yml
+++ b/.github/workflows/update_rapids.yml
@@ -25,7 +25,7 @@ jobs:
name: Check latest RAPIDS
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+ - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
submodules: 'true'
- name: Check latest RAPIDS and update conftest.sh
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index fb7c8dbe69e7..e7fa372d89f9 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -12,7 +12,7 @@ submodules:
build:
os: ubuntu-22.04
tools:
- python: "3.8"
+ python: "3.10"
apt_packages:
- graphviz
- cmake
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c69b0d2a3dc7..e718d88ab1c2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,7 +6,7 @@ if(PLUGIN_SYCL)
string(REPLACE " -isystem ${CONDA_PREFIX}/include" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
-project(xgboost LANGUAGES CXX C VERSION 2.1.0)
+project(xgboost LANGUAGES CXX C VERSION 2.2.0)
include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
@@ -346,7 +346,6 @@ if(BUILD_DEPRECATED_CLI)
PRIVATE
${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include
- ${xgboost_SOURCE_DIR}/rabit/include
)
set_target_properties(runxgboost PROPERTIES OUTPUT_NAME xgboost)
xgboost_target_properties(runxgboost)
diff --git a/R-package/CMakeLists.txt b/R-package/CMakeLists.txt
index 37c5dbf4c1ed..75c3e2d77449 100644
--- a/R-package/CMakeLists.txt
+++ b/R-package/CMakeLists.txt
@@ -29,7 +29,6 @@ target_compile_definitions(
-DDMLC_LOG_BEFORE_THROW=0
-DDMLC_DISABLE_STDIN=1
-DDMLC_LOG_CUSTOMIZE=1
- -DRABIT_STRICT_CXX98_
)
target_include_directories(
@@ -37,7 +36,6 @@ target_include_directories(
${LIBR_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/dmlc-core/include
- ${PROJECT_SOURCE_DIR}/rabit/include
)
target_link_libraries(xgboost-r PUBLIC ${LIBR_CORE_LIBRARY})
diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index b4072aff0b41..82d7011de3a4 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,8 +1,8 @@
Package: xgboost
Type: Package
Title: Extreme Gradient Boosting
-Version: 2.1.0.0
-Date: 2023-08-19
+Version: 2.2.0.0
+Date: 2024-06-03
Authors@R: c(
person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"),
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index 7b6a20f704dd..69f358751dc8 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -27,8 +27,7 @@ NVL <- function(x, val) {
}
.RANKING_OBJECTIVES <- function() {
- return(c('binary:logistic', 'binary:logitraw', 'binary:hinge', 'multi:softmax',
- 'multi:softprob'))
+ return(c('rank:pairwise', 'rank:ndcg', 'rank:map'))
}
@@ -213,7 +212,7 @@ xgb.iter.eval <- function(bst, evals, iter, feval) {
res <- sapply(seq_along(evals), function(j) {
w <- evals[[j]]
## predict using all trees
- preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
+ preds <- predict(bst, w, outputmargin = TRUE, reshape = TRUE, iterationrange = "all")
eval_res <- feval(preds, w)
out <- eval_res$value
names(out) <- paste0(evnames[j], "-", eval_res$metric)
diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index 77d75fa9c2a5..77b33f16db44 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -249,7 +249,7 @@ xgb.get.handle <- function(object) {
#' summary(rowSums(pred_contr) - qlogis(pred))
#' # for the 1st record, let's inspect its features that had non-zero contribution to prediction:
#' contr1 <- pred_contr[1,]
-#' contr1 <- contr1[-length(contr1)] # drop BIAS
+#' contr1 <- contr1[-length(contr1)] # drop intercept
#' contr1 <- contr1[contr1 != 0] # drop non-contributing features
#' contr1 <- contr1[order(abs(contr1))] # order by contribution magnitude
#' old_mar <- par("mar")
@@ -473,7 +473,7 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
.Call(XGSetArrayDimInplace_R, arr, rev(shape))
- cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "BIAS") else NULL
+ cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "(Intercept)") else NULL
n_groups <- shape[2]
## Needed regardless of whether strict shape is being used.
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index 4cea088e0e45..0aa3cdcf1df0 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -122,11 +122,23 @@
#' printed out during the training.
#' E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
#' the performance of each round's model on mat1 and mat2.
-#' @param obj customized objective function. Returns gradient and second order
-#' gradient with given prediction and dtrain.
-#' @param feval customized evaluation function. Returns
-#' \code{list(metric='metric-name', value='metric-value')} with given
-#' prediction and dtrain.
+#' @param obj customized objective function. Should take two arguments: the first one will be the
+#' current predictions (either a numeric vector or matrix depending on the number of targets / classes),
+#' and the second one will be the `data` DMatrix object that is used for training.
+#'
+#' It should return a list with two elements `grad` and `hess` (in that order), as either
+#' numeric vectors or numeric matrices depending on the number of targets / classes (same
+#' dimension as the predictions that are passed as first argument).
+#' @param feval customized evaluation function. Just like `obj`, should take two arguments, with
+#' the first one being the predictions and the second one the `data` DMatrix.
+#'
+#' Should return a list with two elements `metric` (name that will be displayed for this metric,
+#' should be a string / character), and `value` (the number that the function calculates, should
+#' be a numeric scalar).
+#'
+#' Note that even if passing `feval`, objectives also have an associated default metric that
+#' will be evaluated in addition to it. In order to disable the built-in metric, one can pass
+#' parameter `disable_default_eval_metric = TRUE`.
#' @param verbose If 0, xgboost will stay silent. If 1, it will print information about performance.
#' If 2, some additional information will be printed out.
#' Note that setting \code{verbose > 0} automatically engages the
diff --git a/R-package/configure b/R-package/configure
index 3bbfa71503fb..395ea9ee5d5d 100755
--- a/R-package/configure
+++ b/R-package/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for xgboost 2.1.0.
+# Generated by GNU Autoconf 2.71 for xgboost 2.2.0.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -607,8 +607,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='xgboost'
PACKAGE_TARNAME='xgboost'
-PACKAGE_VERSION='2.1.0'
-PACKAGE_STRING='xgboost 2.1.0'
+PACKAGE_VERSION='2.2.0'
+PACKAGE_STRING='xgboost 2.2.0'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures xgboost 2.1.0 to adapt to many kinds of systems.
+\`configure' configures xgboost 2.2.0 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1287,7 +1287,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of xgboost 2.1.0:";;
+ short | recursive ) echo "Configuration of xgboost 2.2.0:";;
esac
cat <<\_ACEOF
@@ -1367,7 +1367,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-xgboost configure 2.1.0
+xgboost configure 2.2.0
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by xgboost $as_me 2.1.0, which was
+It was created by xgboost $as_me 2.2.0, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by xgboost $as_me 2.1.0, which was
+This file was extended by xgboost $as_me 2.2.0, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
-xgboost config.status 2.1.0
+xgboost config.status 2.2.0
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"
diff --git a/R-package/configure.ac b/R-package/configure.ac
index 89f8635fe315..ee9ce823a0c1 100644
--- a/R-package/configure.ac
+++ b/R-package/configure.ac
@@ -2,7 +2,7 @@
AC_PREREQ(2.69)
-AC_INIT([xgboost],[2.1.0],[],[xgboost],[])
+AC_INIT([xgboost],[2.2.0],[],[xgboost],[])
: ${R_HOME=`R RHOME`}
if test -z "${R_HOME}"; then
diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd
index 88a2f203efcd..9c2e434d0625 100644
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -211,7 +211,7 @@ str(pred_contr)
summary(rowSums(pred_contr) - qlogis(pred))
# for the 1st record, let's inspect its features that had non-zero contribution to prediction:
contr1 <- pred_contr[1,]
-contr1 <- contr1[-length(contr1)] # drop BIAS
+contr1 <- contr1[-length(contr1)] # drop intercept
contr1 <- contr1[contr1 != 0] # drop non-contributing features
contr1 <- contr1[order(abs(contr1))] # order by contribution magnitude
old_mar <- par("mar")
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index 21c8dbe16413..937020e0dd38 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -167,12 +167,26 @@ printed out during the training.
E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
the performance of each round's model on mat1 and mat2.}
-\item{obj}{customized objective function. Returns gradient and second order
-gradient with given prediction and dtrain.}
+\item{obj}{customized objective function. Should take two arguments: the first one will be the
+current predictions (either a numeric vector or matrix depending on the number of targets / classes),
+and the second one will be the \code{data} DMatrix object that is used for training.
-\item{feval}{customized evaluation function. Returns
-\code{list(metric='metric-name', value='metric-value')} with given
-prediction and dtrain.}
+\if{html}{\out{
}}\preformatted{ It should return a list with two elements `grad` and `hess` (in that order), as either
+ numeric vectors or numeric matrices depending on the number of targets / classes (same
+ dimension as the predictions that are passed as first argument).
+}\if{html}{\out{
}}}
+
+\item{feval}{customized evaluation function. Just like \code{obj}, should take two arguments, with
+the first one being the predictions and the second one the \code{data} DMatrix.
+
+\if{html}{\out{}}\preformatted{ Should return a list with two elements `metric` (name that will be displayed for this metric,
+ should be a string / character), and `value` (the number that the function calculates, should
+ be a numeric scalar).
+
+ Note that even if passing `feval`, objectives also have an associated default metric that
+ will be evaluated in addition to it. In order to disable the built-in metric, one can pass
+ parameter `disable_default_eval_metric = TRUE`.
+}\if{html}{\out{
}}}
\item{verbose}{If 0, xgboost will stay silent. If 1, it will print information about performance.
If 2, some additional information will be printed out.
diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in
index 93cfb8e5b4c1..0cabffcad3c8 100644
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
PKG_CPPFLAGS = \
-I$(PKGROOT)/include \
-I$(PKGROOT)/dmlc-core/include \
- -I$(PKGROOT)/rabit/include \
-I$(PKGROOT) \
$(XGB_RFLAGS)
diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win
index f160930e8a4a..c49006c5e0a6 100644
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
PKG_CPPFLAGS = \
-I$(PKGROOT)/include \
-I$(PKGROOT)/dmlc-core/include \
- -I$(PKGROOT)/rabit/include \
-I$(PKGROOT) \
$(XGB_RFLAGS)
diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R
index d3050b152aa0..cf3a347d4d9d 100644
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@@ -147,3 +147,34 @@ test_that("custom objective with multi-class works", {
expect_equal(custom_predt, builtin_predt)
})
+
+test_that("custom metric with multi-target passes reshaped data to feval", {
+ x <- as.matrix(iris[, -5])
+ y <- as.numeric(iris$Species) - 1
+ dtrain <- xgb.DMatrix(data = x, label = y)
+
+ multinomial.ll <- function(predt, dtrain) {
+ expect_equal(dim(predt), c(nrow(iris), 3L))
+ y <- getinfo(dtrain, "label")
+ probs <- apply(predt, 1, softmax) |> t()
+ probs.y <- probs[cbind(seq(1L, nrow(predt)), y + 1L)]
+ ll <- sum(log(probs.y))
+ return(list(metric = "multinomial-ll", value = -ll))
+ }
+
+ model <- xgb.train(
+ params = list(
+ objective = "multi:softmax",
+ num_class = 3L,
+ base_score = 0,
+ disable_default_eval_metric = TRUE,
+ max_depth = 123,
+ seed = 123
+ ),
+ data = dtrain,
+ nrounds = 2L,
+ evals = list(Train = dtrain),
+ eval_metric = multinomial.ll,
+ verbose = 0
+ )
+})
diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R
index 38b5ca0667bf..c619bc50b5da 100644
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -101,7 +101,7 @@ test_that("predict feature contributions works", {
# gbtree binary classifier
expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE), regexp = NA)
expect_equal(dim(pred_contr), c(nrow(sparse_matrix), ncol(sparse_matrix) + 1))
- expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "BIAS"))
+ expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "(Intercept)"))
pred <- predict(bst.Tree, sparse_matrix, outputmargin = TRUE)
expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
# must work with data that has no column names
@@ -114,14 +114,14 @@ test_that("predict feature contributions works", {
# gbtree binary classifier (approximate method)
expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA)
expect_equal(dim(pred_contr), c(nrow(sparse_matrix), ncol(sparse_matrix) + 1))
- expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "BIAS"))
+ expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "(Intercept)"))
pred <- predict(bst.Tree, sparse_matrix, outputmargin = TRUE)
expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
# gblinear binary classifier
expect_error(pred_contr <- predict(bst.GLM, sparse_matrix, predcontrib = TRUE), regexp = NA)
expect_equal(dim(pred_contr), c(nrow(sparse_matrix), ncol(sparse_matrix) + 1))
- expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "BIAS"))
+ expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "(Intercept)"))
pred <- predict(bst.GLM, sparse_matrix, outputmargin = TRUE)
expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
# manual calculation of linear terms
@@ -137,7 +137,7 @@ test_that("predict feature contributions works", {
expect_is(pred_contr, "list")
expect_length(pred_contr, 3)
for (g in seq_along(pred_contr)) {
- expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS"))
+ expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "(Intercept)"))
expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 1e-5)
}
@@ -151,7 +151,7 @@ test_that("predict feature contributions works", {
byrow = TRUE
)
for (g in seq_along(pred_contr)) {
- expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS"))
+ expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "(Intercept)"))
expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance)
# manual calculation of linear terms
coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last
diff --git a/R-package/tests/testthat/test_interactions.R b/R-package/tests/testthat/test_interactions.R
index 645efc12a14c..60cf9d80039a 100644
--- a/R-package/tests/testthat/test_interactions.R
+++ b/R-package/tests/testthat/test_interactions.R
@@ -48,7 +48,7 @@ test_that("predict feature interactions works", {
intr <- predict(b, dm, predinteraction = TRUE)
expect_equal(dim(intr), c(N, P + 1, P + 1))
# check assigned colnames
- cn <- c(letters[1:P], "BIAS")
+ cn <- c(letters[1:P], "(Intercept)")
expect_equal(dimnames(intr), list(NULL, cn, cn))
# check the symmetry
@@ -60,7 +60,7 @@ test_that("predict feature interactions works", {
# diagonal terms for features 3,4,5 must be close to zero
expect_lt(Reduce(max, sapply(3:P, function(i) max(abs(intr[, i, i])))), 0.05)
- # BIAS must have no interactions
+ # Intercept must have no interactions
expect_lt(max(abs(intr[, 1:P, P + 1])), 0.00001)
# interactions other than 2 x 3 must be close to zero
diff --git a/README.md b/README.md
index 234bd7dba76e..220e94637fe1 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-
eXtreme Gradient Boosting
+
eXtreme Gradient Boosting
===========
[![Build Status](https://badge.buildkite.com/aca47f40a32735c00a8550540c5eeff6a4c1d246a580cae9b0.svg?branch=master)](https://buildkite.com/xgboost/xgboost-ci)
@@ -11,6 +11,7 @@
[![Optuna](https://img.shields.io/badge/Optuna-integrated-blue)](https://optuna.org)
[![Twitter](https://img.shields.io/badge/@XGBoostProject--_.svg?style=social&logo=twitter)](https://twitter.com/XGBoostProject)
[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/dmlc/xgboost/badge)](https://api.securityscorecards.dev/projects/github.com/dmlc/xgboost)
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/comet-ml/comet-examples/blob/master/integrations/model-training/xgboost/notebooks/how_to_use_comet_with_xgboost_tutorial.ipynb)
[Community](https://xgboost.ai/community) |
[Documentation](https://xgboost.readthedocs.org) |
@@ -49,6 +50,7 @@ Become a sponsor and get a logo here. See details at [Sponsoring the XGBoost Pro
+
### Backers
[[Become a backer](https://opencollective.com/xgboost#backer)]
diff --git a/demo/c-api/basic/Makefile b/demo/c-api/basic/Makefile
index 345079fa9a75..dceb9bc73a11 100644
--- a/demo/c-api/basic/Makefile
+++ b/demo/c-api/basic/Makefile
@@ -4,7 +4,7 @@ TGT=c-api-demo
cc=cc
CFLAGS ?=-O3
XGBOOST_ROOT ?=../..
-INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include -I$(XGBOOST_ROOT)/rabit/include
+INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include
LIB_DIR=-L$(XGBOOST_ROOT)/lib
build: $(TGT)
diff --git a/demo/dask/gpu_training.py b/demo/dask/gpu_training.py
index f53835ffbee9..d964d78e20aa 100644
--- a/demo/dask/gpu_training.py
+++ b/demo/dask/gpu_training.py
@@ -3,7 +3,7 @@
====================================
"""
-import cupy as cp
+import dask
import dask_cudf
from dask import array as da
from dask import dataframe as dd
@@ -24,12 +24,8 @@ def using_dask_matrix(client: Client, X: da.Array, y: da.Array) -> da.Array:
# history obtained from evaluation metrics.
output = dxgb.train(
client,
- {
- "verbosity": 2,
- "tree_method": "hist",
- # Golden line for GPU training
- "device": "cuda",
- },
+ # Make sure the device is set to CUDA.
+ {"tree_method": "hist", "device": "cuda"},
dtrain,
num_boost_round=4,
evals=[(dtrain, "train")],
@@ -50,18 +46,17 @@ def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> d
.. versionadded:: 1.2.0
"""
- X = dask_cudf.from_dask_dataframe(dd.from_dask_array(X))
- y = dask_cudf.from_dask_dataframe(dd.from_dask_array(y))
-
# `DaskQuantileDMatrix` is used instead of `DaskDMatrix`, be careful that it can not
# be used for anything else other than training unless a reference is specified. See
# the `ref` argument of `DaskQuantileDMatrix`.
dtrain = dxgb.DaskQuantileDMatrix(client, X, y)
output = dxgb.train(
client,
- {"verbosity": 2, "tree_method": "hist", "device": "cuda"},
+ # Make sure the device is set to CUDA.
+ {"tree_method": "hist", "device": "cuda"},
dtrain,
num_boost_round=4,
+ evals=[(dtrain, "train")],
)
prediction = dxgb.predict(client, output, X)
@@ -72,15 +67,23 @@ def using_quantile_device_dmatrix(client: Client, X: da.Array, y: da.Array) -> d
# `LocalCUDACluster` is used for assigning GPU to XGBoost processes. Here
# `n_workers` represents the number of GPUs since we use one GPU per worker process.
with LocalCUDACluster(n_workers=2, threads_per_worker=4) as cluster:
- with Client(cluster) as client:
- # generate some random data for demonstration
+ # Create client from cluster, set the backend to GPU array (cupy).
+ with Client(cluster) as client, dask.config.set({"array.backend": "cupy"}):
+ # Generate some random data for demonstration
rng = da.random.default_rng(1)
- m = 100000
+ m = 2**18
n = 100
- X = rng.normal(size=(m, n))
+ X = rng.uniform(size=(m, n), chunks=(128**2, -1))
y = X.sum(axis=1)
+ X = dd.from_dask_array(X)
+ y = dd.from_dask_array(y)
+ # XGBoost can take arrays. This is to show that DataFrame uses the GPU
+ # backend as well.
+ assert isinstance(X, dask_cudf.DataFrame)
+ assert isinstance(y, dask_cudf.Series)
+
print("Using DaskQuantileDMatrix")
from_ddqdm = using_quantile_device_dmatrix(client, X, y)
print("Using DMatrix")
diff --git a/demo/dask/sklearn_gpu_training.py b/demo/dask/sklearn_gpu_training.py
index 6161bf9a3402..56f1be7151c4 100644
--- a/demo/dask/sklearn_gpu_training.py
+++ b/demo/dask/sklearn_gpu_training.py
@@ -3,6 +3,7 @@
===================================================================
"""
+import dask
from dask import array as da
from dask.distributed import Client
@@ -13,17 +14,18 @@
def main(client: Client) -> dxgb.Booster:
- # generate some random data for demonstration
+ # Generate some random data for demonstration
+ rng = da.random.default_rng(1)
+
+ m = 2**18
n = 100
- m = 1000000
- partition_size = 10000
- X = da.random.random((m, n), partition_size)
- y = da.random.random(m, partition_size)
+ X = rng.uniform(size=(m, n), chunks=(128**2, -1))
+ y = X.sum(axis=1)
regressor = dxgb.DaskXGBRegressor(verbosity=1)
- # set the device to CUDA
+ # Set the device to CUDA
regressor.set_params(tree_method="hist", device="cuda")
- # assigning client here is optional
+ # Assigning client here is optional
regressor.client = client
regressor.fit(X, y, eval_set=[(X, y)])
@@ -42,5 +44,6 @@ def main(client: Client) -> dxgb.Booster:
# With dask cuda, one can scale up XGBoost to arbitrary GPU clusters.
# `LocalCUDACluster` used here is only for demonstration purpose.
with LocalCUDACluster() as cluster:
- with Client(cluster) as client:
+ # Create client from cluster, set the backend to GPU array (cupy).
+ with Client(cluster) as client, dask.config.set({"array.backend": "cupy"}):
main(client)
diff --git a/dev/release-artifacts.py b/dev/release-artifacts.py
index d5f28f6fc0ca..9f27d35738c6 100644
--- a/dev/release-artifacts.py
+++ b/dev/release-artifacts.py
@@ -230,7 +230,6 @@ def release_note(
) -> None:
"""Generate a note for GitHub release description."""
r_gpu_linux_url = r_urls["linux"]
- r_gpu_win64_url = r_urls["win64"]
src_tarball = (
f"https://github.com/dmlc/xgboost/releases/download/v{release}/{tarname}"
)
@@ -251,7 +250,6 @@ def release_note(
**Experimental binary packages for R with CUDA enabled**
* xgboost_r_gpu_linux_{release}.tar.gz: [Download]({r_gpu_linux_url})
-* xgboost_r_gpu_win64_{release}.tar.gz: [Download]({r_gpu_win64_url})
**Source tarball**
* xgboost.tar.gz: [Download]({src_tarball})"""
@@ -297,6 +295,8 @@ def main(args: argparse.Namespace) -> None:
commit_hash = latest_hash()
outdir = os.path.abspath(args.outdir)
+ if outdir.find(str(ROOT)) != -1:
+ raise ValueError("output dir must be outside of the source tree.")
if not os.path.exists(outdir):
os.mkdir(outdir)
diff --git a/doc/build.rst b/doc/build.rst
index cba75ff57d2b..fda64820ad04 100644
--- a/doc/build.rst
+++ b/doc/build.rst
@@ -134,11 +134,11 @@ From the command line on Linux starting from the XGBoost directory:
.. note:: Specifying compute capability
- To speed up compilation, the compute version specific to your GPU could be passed to cmake as, e.g., ``-DGPU_COMPUTE_VER=50``. A quick explanation and numbers for some architectures can be found `in this page `_.
+ To speed up compilation, the compute version specific to your GPU could be passed to cmake as, e.g., ``-DCMAKE_CUDA_ARCHITECTURES=75``. A quick explanation and numbers for some architectures can be found `in this page `_.
.. note:: Faster distributed GPU training with NCCL
- By default, distributed GPU training is enabled and uses Rabit for communication. For faster training, set the option ``USE_NCCL=ON``. Faster distributed GPU training depends on NCCL2, available at `this link `_. Since NCCL2 is only available for Linux machines, **faster distributed GPU training is available only for Linux**.
+ By default, distributed GPU training is enabled with the option ``USE_NCCL=ON``. Distributed GPU training depends on NCCL2, available at `this link `_. Since NCCL2 is only available for Linux machines, **Distributed GPU training is available only for Linux**.
.. code-block:: bash
@@ -147,6 +147,8 @@ From the command line on Linux starting from the XGBoost directory:
cmake .. -DUSE_CUDA=ON -DUSE_NCCL=ON -DNCCL_ROOT=/path/to/nccl2
make -j4
+Some additional flags are available for NCCL, ``BUILD_WITH_SHARED_NCCL`` enables building XGBoost with NCCL as a shared library, while ``USE_DLOPEN_NCCL`` enables XGBoost to load NCCL at runtime using ``dlopen``.
+
On Windows, run CMake as follows:
.. code-block:: bash
@@ -165,6 +167,17 @@ The above cmake configuration run will create an ``xgboost.sln`` solution file i
To speed up compilation, run multiple jobs in parallel by appending option ``-- /MP``.
+Federated Learning
+==================
+
+The federated learning plugin requires ``grpc`` and ``protobuf``. To install grpc, refer
+to the `installation guide from the gRPC website
+`_. Alternatively, one can use the
+``libgrpc`` and the ``protobuf`` package from conda forge if conda is available. After
+obtaining the required dependencies, enable the flag: `-DPLUGIN_FEDERATED=ON` when running
+CMake. Please note that only Linux is supported for the federated plugin.
+
+
.. _build_python:
***********************************
@@ -228,11 +241,12 @@ There are several ways to build and install the package from source:
3. Editable installation
- To further enable rapid development and iteration, we provide an **editable installation**.
- In an editable installation, the installed package is simply a symbolic link to your
- working copy of the XGBoost source code. So every changes you make to your source
- directory will be immediately visible to the Python interpreter. Here is how to
- install XGBoost as editable installation:
+ To further enable rapid development and iteration, we provide an **editable
+ installation**. In an editable installation, the installed package is simply a symbolic
+ link to your working copy of the XGBoost source code. So every changes you make to your
+ source directory will be immediately visible to the Python interpreter. To install
+ XGBoost as editable installation, first build the shared library as previously
+ described, then install the Python package:
.. code-block:: bash
diff --git a/doc/conf.py b/doc/conf.py
index ec58c5a5d456..0a90fa297bef 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -21,8 +21,6 @@
import warnings
from urllib.error import HTTPError
-from sh.contrib import git
-
CURR_PATH = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
PROJECT_ROOT = os.path.normpath(os.path.join(CURR_PATH, os.path.pardir))
TMP_DIR = os.path.join(CURR_PATH, "tmp")
@@ -61,6 +59,49 @@ def run_doxygen():
os.chdir(curdir)
+def build_jvm_docs():
+ """Build docs for the JVM packages"""
+ git_branch = os.getenv("READTHEDOCS_VERSION_NAME", default=None)
+ print(f"READTHEDOCS_VERSION_NAME = {git_branch}")
+
+ if not git_branch:
+ git_branch = "master"
+ elif git_branch == "latest":
+ git_branch = "master"
+ elif git_branch == "stable":
+ git_branch = f"release_{version}"
+ print(f"git_branch = {git_branch}")
+
+ def try_fetch_jvm_doc(branch):
+ """
+ Attempt to fetch JVM docs for a given branch.
+ Returns True if successful
+ """
+ try:
+ url = f"https://s3-us-west-2.amazonaws.com/xgboost-docs/{branch}.tar.bz2"
+ filename, _ = urllib.request.urlretrieve(url)
+ if not os.path.exists(TMP_DIR):
+ print(f"Create directory {TMP_DIR}")
+ os.mkdir(TMP_DIR)
+ jvm_doc_dir = os.path.join(TMP_DIR, "jvm_docs")
+ if os.path.exists(jvm_doc_dir):
+ print(f"Delete directory {jvm_doc_dir}")
+ shutil.rmtree(jvm_doc_dir)
+ print(f"Create directory {jvm_doc_dir}")
+ os.mkdir(jvm_doc_dir)
+
+ with tarfile.open(filename, "r:bz2") as t:
+ t.extractall(jvm_doc_dir)
+ return True
+ except HTTPError:
+ print(f"JVM doc not found at {url}. Skipping...")
+ return False
+
+ if not try_fetch_jvm_doc(git_branch):
+ print(f"Falling back to the master branch...")
+ try_fetch_jvm_doc("master")
+
+
def is_readthedocs_build():
if os.environ.get("READTHEDOCS", None) == "True":
return True
@@ -75,40 +116,9 @@ def is_readthedocs_build():
if is_readthedocs_build():
run_doxygen()
+ build_jvm_docs()
-git_branch = os.getenv("SPHINX_GIT_BRANCH", default=None)
-if not git_branch:
- # If SPHINX_GIT_BRANCH environment variable is not given, run git
- # to determine branch name
- git_branch = [
- re.sub(r"origin/", "", x.lstrip(" "))
- for x in str(git.branch("-r", "--contains", "HEAD")).rstrip("\n").split("\n")
- ]
- git_branch = [x for x in git_branch if "HEAD" not in x]
-else:
- git_branch = [git_branch]
-print("git_branch = {}".format(git_branch[0]))
-
-try:
- filename, _ = urllib.request.urlretrieve(
- f"https://s3-us-west-2.amazonaws.com/xgboost-docs/{git_branch[0]}.tar.bz2"
- )
- if not os.path.exists(TMP_DIR):
- print(f"Create directory {TMP_DIR}")
- os.mkdir(TMP_DIR)
- jvm_doc_dir = os.path.join(TMP_DIR, "jvm")
- if os.path.exists(jvm_doc_dir):
- print(f"Delete directory {jvm_doc_dir}")
- shutil.rmtree(jvm_doc_dir)
- print(f"Create directory {jvm_doc_dir}")
- os.mkdir(jvm_doc_dir)
-
- with tarfile.open(filename, "r:bz2") as t:
- t.extractall(jvm_doc_dir)
-except HTTPError:
- print("JVM doc not found. Skipping...")
-
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
@@ -152,7 +162,7 @@ def is_readthedocs_build():
"../demo/dask",
"../demo/aft_survival",
"../demo/gpu_acceleration",
- "../demo/rmm_plugin"
+ "../demo/rmm_plugin",
],
# path to where to save gallery generated output
"gallery_dirs": [
@@ -250,7 +260,7 @@ def is_readthedocs_build():
html_theme_options = {"logo_only": True}
-html_logo = "https://xgboost.ai/images/logo/xgboost-logo-ng.png"
+html_logo = "https://xgboost.ai/images/logo/xgboost-logo.png"
html_css_files = ["css/custom.css"]
diff --git a/doc/contrib/featuremap.rst b/doc/contrib/featuremap.rst
new file mode 100644
index 000000000000..66b87129e774
--- /dev/null
+++ b/doc/contrib/featuremap.rst
@@ -0,0 +1,69 @@
+############################
+XGBoost Internal Feature Map
+############################
+
+The following is a reference to the features supported by XGBoost. It is not a beginner's guide, but rather a list meant to help those looking to add new features to XGBoost understand what needs to be covered.
+
+*************
+Core Features
+*************
+Core features are not dependent on language binding and any language binding can choose to support them.
+
+-------------
+Data Storage
+-------------
+The primary data structure in XGBoost for storing user inputs is ``DMatrix``; it's a container for all data that XGBoost can use. ``QuantileDMatrix`` is a variant specifically designed for the ``hist`` tree method. Both can take GPU-based inputs. They take an optional parameter ``missing`` to specify which input value should be ignored. For external memory support, please refer to :doc:`/tutorials/external_memory`.
+
+---------------------
+Single Node Training
+---------------------
+There are two different model types in XGBoost: the tree model, which we primarily focus on, and the linear model. For the tree model, we have various methods to build decision trees; please see the :doc:`/treemethod` for a complete reference. In addition to the tree method, we have many hyper-parameters for tuning the model and injecting prior knowledge into the training process. Two noteworthy examples are :doc:`monotonic constraints ` and :doc:`feature interaction constraints `. These two constraints require special treatment during tree construction. Both the ``hist`` and the ``approx`` tree methods support GPU acceleration. Also, XGBoost GPU supports gradient-based sampling, which supports external-memory data as well.
+
+The objective function plays an important role in training. It not only provides the gradient, but also responsible for estimating a good starting point for Newton optimization. Please note that users can define custom objective functions for the task at hand.
+In addition to numerical features, XGBoost also supports categorical features with two different algorithms, including one-hot encoding and optimal partitioning. For more information, refer to the :doc:`categorical feature tutorial `. The ``hist`` and the ``approx`` tree methods support categorical features for CPU and GPU.
+
+There's working-in-progress support for vector leaves, which are decision tree leaves that contain multiple values. This type of tree is used to support efficient multi-class and multi-target models.
+
+----------
+Inference
+----------
+By inference, we specifically mean getting model prediction for the response variable. XGBoost supports two inference methods. The first one is the prediction on the ``DMatrix`` object (or ``QuantileDMatrix``, which is a subclass). Using a ``DMatrix`` object allows XGBoost to cache the prediction, hence getting faster performance when running prediction on the same data with new trees. The second method is ``inplace_predict``, which bypasses the construction of ``DMatrix``. It's more efficient but doesn't support cached prediction. In addtion to returning the estimated response, we also support returning the leaf index, which can be used to analyse the model and as a feature to another model.
+
+----------
+Model IO
+----------
+We have a set of methods for different model serialization methods, including complete serialization, saving to a file, and saving to a buffer. For more, refer to the :doc:`/tutorials/saving_model`.
+
+-------------------
+Model Explanation
+-------------------
+XGBoost includes features designed to improve understanding of the model. Here's a list:
+
+- Global feature importance.
+- SHAP value, including contribution and intervention.
+- Tree dump.
+- Tree visualization.
+- Tree as dataframe.
+
+For GPU support, the SHAP value uses the `GPUTreeShap `_ project in rapidsai. They all support categorical features, while vector-leaf is still in progress.
+
+----------
+Evaluation
+----------
+XGBoost has built-in support for a wide range of metrics, from basic regression to learning to rank and survival modeling. They can handle distributed training and GPU-based acceleration. Custom metrics are supported as well, please see :doc:`/tutorials/custom_metric_obj`.
+
+--------------------
+Distributed Training
+--------------------
+XGBoost has built-in support for three distributed frameworks, including ``Dask``, ``PySpark``, and ``Spark (Scala)``. In addition, there's ``flink`` support for the Java binding and the ``ray-xgboost`` project. Please see the respective tutorial on how to use them. By default, XGBoost uses sample-based parallelism for distributed training. The column-based split is still working in progress and needs to be supported in these high-level framework integrations. On top of distributed training, we are also working on federated learning for both sample-based and column-based splits.
+
+Distributed training works with custom objective functions and metrics as well. XGBoost aggregates the evaluation result automatically during training.
+
+The distributed training is enabled by a built-in implementation of a collective library. It's based on the RABIT project and has evolved significantly since its early adoption. The collective implementation supports GPU via NCCL, and has variants for handling federated learning and federated learning on GPU.
+
+Inference normally doesn't require any special treatment since we are using sample-based split. However, with column-based data split, we need to initialize the communicator context as well.
+
+*****************
+Language Bindings
+*****************
+We have a list of bindings for various languages. Inside the XGBoost repository, there's Python, R, Java, Scala, and C. All language bindings are built on top of the C version. Some others, like Julia and Rust, have their own repository. For guideline on adding a new binding, please see :doc:`/contrib/consistency`.
\ No newline at end of file
diff --git a/doc/contrib/index.rst b/doc/contrib/index.rst
index feac865fbe34..75bd37094e89 100644
--- a/doc/contrib/index.rst
+++ b/doc/contrib/index.rst
@@ -27,6 +27,7 @@ Here are guidelines for contributing to various aspect of the XGBoost project:
python_packaging
unit_tests
Docs and Examples
+ featuremap
git_guide
release
ci
diff --git a/doc/faq.rst b/doc/faq.rst
index 4fe63076c18b..cdfb8bc2cb3c 100644
--- a/doc/faq.rst
+++ b/doc/faq.rst
@@ -37,7 +37,7 @@ The ultimate question will still come back to how to push the limit of each comp
and use less resources to complete the task (thus with less communication and chance of failure).
To achieve these, we decide to reuse the optimizations in the single node XGBoost and build the distributed version on top of it.
-The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs (in our case rabit).
+The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs.
Such design allows us to reuse most of the code, while being portable to major platforms such as Hadoop/Yarn, MPI, SGE.
Most importantly, it pushes the limit of the computation resources we can use.
diff --git a/doc/jvm/api.rst b/doc/jvm/api.rst
new file mode 100644
index 000000000000..b9e7821aa6fa
--- /dev/null
+++ b/doc/jvm/api.rst
@@ -0,0 +1,8 @@
+#############################
+API Docs for the JVM packages
+#############################
+
+* `XGBoost4J Java API <../jvm_docs/javadocs/index.html>`_
+* `XGBoost4J Scala API <../jvm_docs/scaladocs/xgboost4j/index.html>`_
+* `XGBoost4J-Spark Scala API <../jvm_docs/scaladocs/xgboost4j-spark/index.html>`_
+* `XGBoost4J-Flink Scala API <../jvm_docs/scaladocs/xgboost4j-flink/index.html>`_
diff --git a/doc/jvm/index.rst b/doc/jvm/index.rst
index a92834d747e0..0a2e947ea586 100644
--- a/doc/jvm/index.rst
+++ b/doc/jvm/index.rst
@@ -37,10 +37,7 @@ Contents
XGBoost4J-Spark Tutorial
XGBoost4J-Spark-GPU Tutorial
Code Examples
- XGBoost4J Java API
- XGBoost4J Scala API
- XGBoost4J-Spark Scala API
- XGBoost4J-Flink Scala API
+ API docs
.. note::
diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h
index 4b60fe01a546..85897412f9a6 100644
--- a/include/xgboost/c_api.h
+++ b/include/xgboost/c_api.h
@@ -114,7 +114,7 @@ XGB_DLL int XGBGetGlobalConfig(char const **out_config);
/**
* @defgroup DMatrix DMatrix
*
- * @brief DMatrix is the baisc data storage for XGBoost used by all XGBoost algorithms
+ * @brief DMatrix is the basic data storage for XGBoost used by all XGBoost algorithms
* including both training, prediction and explanation. There are a few variants of
* `DMatrix` including normal `DMatrix`, which is a CSR matrix, `QuantileDMatrix`,
* which is used by histogram-based tree methods for saving memory, and lastly the
@@ -1265,13 +1265,11 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
* \param handle handle
* \param config JSON encoded string storing parameters for the function. Following
* keys are expected in the JSON document:
- *
- * "format": str
- * - json: Output booster will be encoded as JSON.
- * - ubj: Output booster will be encoded as Univeral binary JSON.
- * - deprecated: Output booster will be encoded as old custom binary format. Do not use
- * this format except for compatibility reasons.
- *
+ * - "format": str
+ * - json: Output booster will be encoded as JSON.
+ * - ubj: Output booster will be encoded as Universal binary JSON.
+ * - deprecated: Output booster will be encoded as old custom binary format. Do not use
+ * this format except for compatibility reasons.
* \param out_len The argument to hold the output length
* \param out_dptr The argument to hold the output data pointer
*
diff --git a/rabit/include/rabit/internal/socket.h b/include/xgboost/collective/poll_utils.h
similarity index 97%
rename from rabit/include/rabit/internal/socket.h
rename to include/xgboost/collective/poll_utils.h
index 3701146d4577..514e0a5c6633 100644
--- a/rabit/include/rabit/internal/socket.h
+++ b/include/xgboost/collective/poll_utils.h
@@ -3,8 +3,7 @@
* \file socket.h
* \author Tianqi Chen
*/
-#ifndef RABIT_INTERNAL_SOCKET_H_
-#define RABIT_INTERNAL_SOCKET_H_
+#pragma once
#include "xgboost/collective/result.h"
#include "xgboost/collective/socket.h"
@@ -61,8 +60,8 @@ using sock_size_t = size_t; // NOLINT
#pragma message("Distributed training on mingw is not supported.")
typedef struct pollfd {
SOCKET fd;
- short events;
- short revents;
+ short events; // NOLINT
+ short revents; // NOLINT
} WSAPOLLFD, *PWSAPOLLFD, *LPWSAPOLLFD;
// POLLRDNORM | POLLRDBAND
@@ -97,7 +96,8 @@ std::enable_if_t, xgboost::collective::Result> PollError(E
if ((revents & POLLERR) != 0) {
auto err = errno;
auto str = strerror(err);
- return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + std::string{str} +
+ return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + // NOLINT
+ std::string{str} + // NOLINT
" code:" + std::to_string(err));
}
if ((revents & POLLNVAL) != 0) {
@@ -229,5 +229,3 @@ struct PollHelper {
#undef POLLPRI
#undef POLLOUT
#endif // IS_MINGW()
-
-#endif // RABIT_INTERNAL_SOCKET_H_
diff --git a/include/xgboost/data.h b/include/xgboost/data.h
index 05e2cb0080f0..7ae1c4ebcc09 100644
--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@@ -473,10 +473,7 @@ class BatchIterator {
return *(*impl_);
}
- bool operator!=(const BatchIterator&) const {
- CHECK(impl_ != nullptr);
- return !impl_->AtEnd();
- }
+ [[nodiscard]] bool operator!=(const BatchIterator&) const { return !this->AtEnd(); }
[[nodiscard]] bool AtEnd() const {
CHECK(impl_ != nullptr);
@@ -511,13 +508,13 @@ class DMatrix {
public:
/*! \brief default constructor */
DMatrix() = default;
- /*! \brief meta information of the dataset */
- virtual MetaInfo& Info() = 0;
+ /** @brief meta information of the dataset */
+ [[nodiscard]] virtual MetaInfo& Info() = 0;
virtual void SetInfo(const char* key, std::string const& interface_str) {
auto const& ctx = *this->Ctx();
this->Info().SetInfo(ctx, key, StringView{interface_str});
}
- /*! \brief meta information of the dataset */
+ /** @brief meta information of the dataset */
[[nodiscard]] virtual const MetaInfo& Info() const = 0;
/*! \brief Get thread local memory for returning data from DMatrix. */
diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h
index 70e5417af779..b20753b03548 100644
--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@@ -5,7 +5,7 @@
#define XGBOOST_VERSION_CONFIG_H_
#define XGBOOST_VER_MAJOR 2 /* NOLINT */
-#define XGBOOST_VER_MINOR 1 /* NOLINT */
+#define XGBOOST_VER_MINOR 2 /* NOLINT */
#define XGBOOST_VER_PATCH 0 /* NOLINT */
#endif // XGBOOST_VERSION_CONFIG_H_
diff --git a/jvm-packages/CMakeLists.txt b/jvm-packages/CMakeLists.txt
index 36ed61a6b063..c6353d4b7400 100644
--- a/jvm-packages/CMakeLists.txt
+++ b/jvm-packages/CMakeLists.txt
@@ -21,7 +21,6 @@ target_include_directories(xgboost4j
${JNI_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native
${PROJECT_SOURCE_DIR}/include
- ${PROJECT_SOURCE_DIR}/dmlc-core/include
- ${PROJECT_SOURCE_DIR}/rabit/include)
+ ${PROJECT_SOURCE_DIR}/dmlc-core/include)
set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)
diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
index 17afbe48d2cc..8b26af4f2190 100644
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -6,7 +6,7 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.0-SNAPSHOT
+ 2.2.0-SNAPSHOT
pom
XGBoost JVM Package
JVM Package for XGBoost
@@ -35,16 +35,17 @@
1.8
1.19.0
4.13.2
- 3.4.1
- 3.4.1
+ 3.5.1
+ 3.5.1
+ 2.15.2
2.12.18
2.12
3.4.0
5
OFF
OFF
- 23.12.1
- 23.12.1
+ 24.04.0
+ 24.04.1
cuda12
3.2.18
2.12.0
@@ -179,7 +180,7 @@
org.sonatype.plugins
nexus-staging-maven-plugin
- 1.6.13
+ 1.7.0
true
ossrh
@@ -410,7 +411,7 @@
net.alchim31.maven
scala-maven-plugin
- 4.9.0
+ 4.9.1
compile
@@ -473,7 +474,7 @@
net.alchim31.maven
scala-maven-plugin
- 4.9.0
+ 4.9.1
-Xms64m
@@ -489,11 +490,6 @@
kryo
5.6.0
-
- com.fasterxml.jackson.core
- jackson-databind
- 2.14.2
-
commons-logging
commons-logging
diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml
index 431c6766a8be..eda453041fa3 100644
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -6,11 +6,11 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.0-SNAPSHOT
+ 2.2.0-SNAPSHOT
xgboost4j-example
xgboost4j-example_2.12
- 2.1.0-SNAPSHOT
+ 2.2.0-SNAPSHOT
jar
diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml
index e3dfb383041f..10ebfe36a6e8 100644
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -6,12 +6,12 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.0-SNAPSHOT
+ 2.2.0-SNAPSHOT
xgboost4j-flink
xgboost4j-flink_2.12
- 2.1.0-SNAPSHOT
+ 2.2.0-SNAPSHOT
2.2.0
diff --git a/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoost.java b/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoost.java
index 99608b927489..a660bca8806c 100644
--- a/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoost.java
+++ b/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoost.java
@@ -176,7 +176,7 @@ public static XGBoostModel train(DataSet> dtrain,
new RabitTracker(dtrain.getExecutionEnvironment().getParallelism());
if (tracker.start()) {
return dtrain
- .mapPartition(new MapFunction(params, numBoostRound, tracker.workerArgs()))
+ .mapPartition(new MapFunction(params, numBoostRound, tracker.getWorkerArgs()))
.reduce((x, y) -> x)
.collect()
.get(0);
diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml
index 25b44d6b2d2d..bd26acd688cd 100644
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -6,11 +6,11 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.0-SNAPSHOT
+ 2.2.0-SNAPSHOT
xgboost4j-gpu_2.12
xgboost4j-gpu
- 2.1.0-SNAPSHOT
+ 2.2.0-SNAPSHOT
jar
@@ -72,7 +72,7 @@
org.apache.maven.plugins
maven-javadoc-plugin
- 3.6.3
+ 3.7.0
protected
true
@@ -88,7 +88,7 @@
exec-maven-plugin
org.codehaus.mojo
- 3.2.0
+ 3.3.0
native
@@ -113,7 +113,7 @@
org.apache.maven.plugins
maven-jar-plugin
- 3.4.0
+ 3.4.1
diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml
index 149f2f3a326a..c97924105f29 100644
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -6,7 +6,7 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.0-SNAPSHOT
+ 2.2.0-SNAPSHOT
xgboost4j-spark-gpu
xgboost4j-spark-gpu_2.12
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuPreXGBoost.scala b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuPreXGBoost.scala
index 7e83dc6f17b0..00c547aa8758 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuPreXGBoost.scala
+++ b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuPreXGBoost.scala
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2021-2022 by Contributors
+ Copyright (c) 2021-2024 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -29,7 +29,7 @@ import org.apache.spark.{SparkContext, TaskContext}
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
-import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
import org.apache.spark.sql.functions.{col, collect_list, struct}
import org.apache.spark.sql.types.{ArrayType, FloatType, StructField, StructType}
@@ -444,7 +444,7 @@ object GpuPreXGBoost extends PreXGBoostProvider {
.groupBy(groupName)
.agg(collect_list(struct(schema.fieldNames.map(col): _*)) as "list")
- implicit val encoder = RowEncoder(schema)
+ implicit val encoder = ExpressionEncoder(RowEncoder.encoderFor(schema, false))
// Expand the grouped rows after repartition
repartitionInputData(groupedDF, nWorkers).mapPartitions(iter => {
new Iterator[Row] {
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuUtils.scala b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuUtils.scala
index c88aefa4eb0a..79a8d5449606 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuUtils.scala
+++ b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuUtils.scala
@@ -89,9 +89,13 @@ private[spark] object GpuUtils {
val featureNameSet = featureNames.distinct
validateSchema(dataset.schema, featureNameSet, labelName, weightName, marginName, fitting)
- val castToFloat = (ds: Dataset[_], colName: String) => {
- val colMeta = ds.schema(colName).metadata
- ds.withColumn(colName, col(colName).as(colName, colMeta).cast(FloatType))
+ val castToFloat = (df: DataFrame, colName: String) => {
+ if (df.schema(colName).dataType.isInstanceOf[FloatType]) {
+ df
+ } else {
+ val colMeta = df.schema(colName).metadata
+ df.withColumn(colName, col(colName).as(colName, colMeta).cast(FloatType))
+ }
}
val colNames = if (fitting) {
var names = featureNameSet :+ labelName
diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml
index 6f16335f013d..5412642549d6 100644
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -6,7 +6,7 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.0-SNAPSHOT
+ 2.2.0-SNAPSHOT
xgboost4j-spark
xgboost4j-spark_2.12
diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
index e17c68355c5b..10c4b5a72992 100644
--- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
+++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
@@ -233,24 +233,6 @@ private[this] class XGBoostExecutionParamsFactory(rawParams: Map[String, Any], s
xgbExecParam.setRawParamMap(overridedParams)
xgbExecParam
}
-
- private[spark] def buildRabitParams : Map[String, String] = Map(
- "rabit_reduce_ring_mincount" ->
- overridedParams.getOrElse("rabit_ring_reduce_threshold", 32 << 10).toString,
- "rabit_debug" ->
- (overridedParams.getOrElse("verbosity", 0).toString.toInt == 3).toString,
- "rabit_timeout" ->
- (overridedParams.getOrElse("rabit_timeout", -1).toString.toInt >= 0).toString,
- "rabit_timeout_sec" -> {
- if (overridedParams.getOrElse("rabit_timeout", -1).toString.toInt >= 0) {
- overridedParams.get("rabit_timeout").toString
- } else {
- "1800"
- }
- },
- "DMLC_WORKER_CONNECT_RETRY" ->
- overridedParams.getOrElse("dmlc_worker_connect_retry", 5).toString
- )
}
/**
@@ -475,17 +457,15 @@ object XGBoost extends XGBoostStageLevel {
}
}
- /** visiable for testing */
- private[scala] def getTracker(nWorkers: Int, trackerConf: TrackerConf): ITracker = {
- val tracker: ITracker = new RabitTracker(
- nWorkers, trackerConf.hostIp, trackerConf.port, trackerConf.timeout)
- tracker
- }
-
- private def startTracker(nWorkers: Int, trackerConf: TrackerConf): ITracker = {
- val tracker = getTracker(nWorkers, trackerConf)
+ // Executes the provided code block inside a tracker and then stops the tracker
+ private def withTracker[T](nWorkers: Int, conf: TrackerConf)(block: ITracker => T): T = {
+ val tracker = new RabitTracker(nWorkers, conf.hostIp, conf.port, conf.timeout)
require(tracker.start(), "FAULT: Failed to start tracker")
- tracker
+ try {
+ block(tracker)
+ } finally {
+ tracker.stop()
+ }
}
/**
@@ -501,28 +481,27 @@ object XGBoost extends XGBoostStageLevel {
logger.info(s"Running XGBoost ${spark.VERSION} with parameters:\n${params.mkString("\n")}")
val xgbParamsFactory = new XGBoostExecutionParamsFactory(params, sc)
- val xgbExecParams = xgbParamsFactory.buildXGBRuntimeParams
- val xgbRabitParams = xgbParamsFactory.buildRabitParams.asJava
+ val runtimeParams = xgbParamsFactory.buildXGBRuntimeParams
- val prevBooster = xgbExecParams.checkpointParam.map { checkpointParam =>
+ val prevBooster = runtimeParams.checkpointParam.map { checkpointParam =>
val checkpointManager = new ExternalCheckpointManager(
checkpointParam.checkpointPath,
FileSystem.get(sc.hadoopConfiguration))
- checkpointManager.cleanUpHigherVersions(xgbExecParams.numRounds)
+ checkpointManager.cleanUpHigherVersions(runtimeParams.numRounds)
checkpointManager.loadCheckpointAsScalaBooster()
}.orNull
// Get the training data RDD and the cachedRDD
- val (trainingRDD, optionalCachedRDD) = buildTrainingData(xgbExecParams)
+ val (trainingRDD, optionalCachedRDD) = buildTrainingData(runtimeParams)
try {
- // Train for every ${savingRound} rounds and save the partially completed booster
- val tracker = startTracker(xgbExecParams.numWorkers, xgbExecParams.trackerConf)
- val (booster, metrics) = try {
- tracker.workerArgs().putAll(xgbRabitParams)
- val rabitEnv = tracker.workerArgs
+ val (booster, metrics) = withTracker(
+ runtimeParams.numWorkers,
+ runtimeParams.trackerConf
+ ) { tracker =>
+ val rabitEnv = tracker.getWorkerArgs()
- val boostersAndMetrics = trainingRDD.barrier().mapPartitions { iter => {
+ val boostersAndMetrics = trainingRDD.barrier().mapPartitions { iter =>
var optionWatches: Option[() => Watches] = None
// take the first Watches to train
@@ -530,26 +509,25 @@ object XGBoost extends XGBoostStageLevel {
optionWatches = Some(iter.next())
}
- optionWatches.map { buildWatches => buildDistributedBooster(buildWatches,
- xgbExecParams, rabitEnv, xgbExecParams.obj, xgbExecParams.eval, prevBooster)}
- .getOrElse(throw new RuntimeException("No Watches to train"))
-
- }}
+ optionWatches.map { buildWatches =>
+ buildDistributedBooster(buildWatches,
+ runtimeParams, rabitEnv, runtimeParams.obj, runtimeParams.eval, prevBooster)
+ }.getOrElse(throw new RuntimeException("No Watches to train"))
+ }
- val boostersAndMetricsWithRes = tryStageLevelScheduling(sc, xgbExecParams,
+ val boostersAndMetricsWithRes = tryStageLevelScheduling(sc, runtimeParams,
boostersAndMetrics)
// The repartition step is to make training stage as ShuffleMapStage, so that when one
// of the training task fails the training stage can retry. ResultStage won't retry when
// it fails.
val (booster, metrics) = boostersAndMetricsWithRes.repartition(1).collect()(0)
(booster, metrics)
- } finally {
- tracker.stop()
}
+
// we should delete the checkpoint directory after a successful training
- xgbExecParams.checkpointParam.foreach {
+ runtimeParams.checkpointParam.foreach {
cpParam =>
- if (!xgbExecParams.checkpointParam.get.skipCleanCheckpoint) {
+ if (!runtimeParams.checkpointParam.get.skipCleanCheckpoint) {
val checkpointManager = new ExternalCheckpointManager(
cpParam.checkpointPath,
FileSystem.get(sc.hadoopConfiguration))
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CommunicatorRobustnessSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CommunicatorRobustnessSuite.scala
index 108053af5d76..d3f3901ad704 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CommunicatorRobustnessSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CommunicatorRobustnessSuite.scala
@@ -45,7 +45,7 @@ class CommunicatorRobustnessSuite extends AnyFunSuite with PerTest {
val tracker = new RabitTracker(numWorkers)
tracker.start()
- val trackerEnvs = tracker. workerArgs
+ val trackerEnvs = tracker.getWorkerArgs
val workerCount: Int = numWorkers
/*
@@ -84,7 +84,7 @@ class CommunicatorRobustnessSuite extends AnyFunSuite with PerTest {
val rdd = sc.parallelize(1 to numWorkers, numWorkers).cache()
val tracker = new RabitTracker(numWorkers)
tracker.start()
- val trackerEnvs = tracker.workerArgs
+ val trackerEnvs = tracker.getWorkerArgs
val workerCount: Int = numWorkers
diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml
index 5a83a400c50b..5c5648b6d23a 100644
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -6,11 +6,11 @@
ml.dmlc
xgboost-jvm_2.12
- 2.1.0-SNAPSHOT
+ 2.2.0-SNAPSHOT
xgboost4j
xgboost4j_2.12
- 2.1.0-SNAPSHOT
+ 2.2.0-SNAPSHOT
jar
@@ -53,6 +53,12 @@
${scalatest.version}
provided
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ ${fasterxml.jackson.version}
+ provided
+
@@ -60,7 +66,7 @@
org.apache.maven.plugins
maven-javadoc-plugin
- 3.6.3
+ 3.7.0
protected
true
@@ -76,7 +82,7 @@
exec-maven-plugin
org.codehaus.mojo
- 3.2.0
+ 3.3.0
native
diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ITracker.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ITracker.java
index 1bfef677d45c..84e535a269e2 100644
--- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ITracker.java
+++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ITracker.java
@@ -7,7 +7,7 @@
*
* - start(timeout): Start the tracker awaiting for worker connections, with a given
* timeout value (in seconds).
- * - workerArgs(): Return the arguments needed to initialize Rabit clients.
+ * - getWorkerArgs(): Return the arguments needed to initialize Rabit clients.
* - waitFor(timeout): Wait for the task execution by the worker nodes for at most `timeout`
* milliseconds.
*
@@ -21,21 +21,8 @@
* brokers connections between workers.
*/
public interface ITracker extends Thread.UncaughtExceptionHandler {
- enum TrackerStatus {
- SUCCESS(0), INTERRUPTED(1), TIMEOUT(2), FAILURE(3);
- private int statusCode;
-
- TrackerStatus(int statusCode) {
- this.statusCode = statusCode;
- }
-
- public int getStatusCode() {
- return this.statusCode;
- }
- }
-
- Map workerArgs() throws XGBoostError;
+ Map getWorkerArgs() throws XGBoostError;
boolean start() throws XGBoostError;
diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/RabitTracker.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/RabitTracker.java
index 914a493cc8d1..48b163a7753b 100644
--- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/RabitTracker.java
+++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/RabitTracker.java
@@ -1,3 +1,19 @@
+/*
+ Copyright (c) 2014-2024 by Contributors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
package ml.dmlc.xgboost4j.java;
import java.util.Map;
@@ -10,14 +26,12 @@
/**
* Java implementation of the Rabit tracker to coordinate distributed workers.
- *
- * The tracker must be started on driver node before running distributed jobs.
*/
public class RabitTracker implements ITracker {
// Maybe per tracker logger?
private static final Log logger = LogFactory.getLog(RabitTracker.class);
private long handle = 0;
- private Thread tracker_daemon;
+ private Thread trackerDaemon;
public RabitTracker(int numWorkers) throws XGBoostError {
this(numWorkers, "");
@@ -44,7 +58,7 @@ public void uncaughtException(Thread t, Throwable e) {
} catch (InterruptedException ex) {
logger.error(ex);
} finally {
- this.tracker_daemon.interrupt();
+ this.trackerDaemon.interrupt();
}
}
@@ -52,16 +66,14 @@ public void uncaughtException(Thread t, Throwable e) {
* Get environments that can be used to pass to worker.
* @return The environment settings.
*/
- public Map workerArgs() throws XGBoostError {
+ public Map getWorkerArgs() throws XGBoostError {
// fixme: timeout
String[] args = new String[1];
XGBoostJNI.checkCall(XGBoostJNI.TrackerWorkerArgs(this.handle, 0, args));
ObjectMapper mapper = new ObjectMapper();
- TypeReference