Skip to content

Commit

Permalink
[coll] Move the rabit poll helper. (#10349)
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed May 31, 2024
1 parent 0717e88 commit e6eefea
Show file tree
Hide file tree
Showing 18 changed files with 24 additions and 40 deletions.
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,6 @@ if(BUILD_DEPRECATED_CLI)
PRIVATE
${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include
${xgboost_SOURCE_DIR}/rabit/include
)
set_target_properties(runxgboost PROPERTIES OUTPUT_NAME xgboost)
xgboost_target_properties(runxgboost)
Expand Down
2 changes: 0 additions & 2 deletions R-package/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,13 @@ target_compile_definitions(
-DDMLC_LOG_BEFORE_THROW=0
-DDMLC_DISABLE_STDIN=1
-DDMLC_LOG_CUSTOMIZE=1
-DRABIT_STRICT_CXX98_
)

target_include_directories(
xgboost-r PRIVATE
${LIBR_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/dmlc-core/include
${PROJECT_SOURCE_DIR}/rabit/include
)

target_link_libraries(xgboost-r PUBLIC ${LIBR_CORE_LIBRARY})
Expand Down
1 change: 0 additions & 1 deletion R-package/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
PKG_CPPFLAGS = \
-I$(PKGROOT)/include \
-I$(PKGROOT)/dmlc-core/include \
-I$(PKGROOT)/rabit/include \
-I$(PKGROOT) \
$(XGB_RFLAGS)

Expand Down
1 change: 0 additions & 1 deletion R-package/src/Makevars.win
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
PKG_CPPFLAGS = \
-I$(PKGROOT)/include \
-I$(PKGROOT)/dmlc-core/include \
-I$(PKGROOT)/rabit/include \
-I$(PKGROOT) \
$(XGB_RFLAGS)

Expand Down
1 change: 0 additions & 1 deletion cmake/Utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,6 @@ function(xgboost_set_cuda_flags target)
target_include_directories(
${target} PRIVATE
${xgboost_SOURCE_DIR}/gputreeshap
${xgboost_SOURCE_DIR}/rabit/include
${CUDAToolkit_INCLUDE_DIRS})

if(MSVC)
Expand Down
2 changes: 1 addition & 1 deletion demo/c-api/basic/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ TGT=c-api-demo
cc=cc
CFLAGS ?=-O3
XGBOOST_ROOT ?=../..
INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include -I$(XGBOOST_ROOT)/rabit/include
INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include
LIB_DIR=-L$(XGBOOST_ROOT)/lib

build: $(TGT)
Expand Down
2 changes: 1 addition & 1 deletion doc/build.rst
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ From the command line on Linux starting from the XGBoost directory:

.. note:: Faster distributed GPU training with NCCL

By default, distributed GPU training is enabled and uses Rabit for communication. For faster training, set the option ``USE_NCCL=ON``. Faster distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **faster distributed GPU training is available only for Linux**.
By default, distributed GPU training is enabled with the option ``USE_NCCL=ON``. Distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **Distributed GPU training is available only for Linux**.

.. code-block:: bash
Expand Down
2 changes: 1 addition & 1 deletion doc/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ The ultimate question will still come back to how to push the limit of each comp
and use less resources to complete the task (thus with less communication and chance of failure).

To achieve these, we decide to reuse the optimizations in the single node XGBoost and build the distributed version on top of it.
The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs (in our case rabit).
The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs.
Such design allows us to reuse most of the code, while being portable to major platforms such as Hadoop/Yarn, MPI, SGE.
Most importantly, it pushes the limit of the computation resources we can use.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
* \file socket.h
* \author Tianqi Chen
*/
#ifndef RABIT_INTERNAL_SOCKET_H_
#define RABIT_INTERNAL_SOCKET_H_
#pragma once
#include "xgboost/collective/result.h"
#include "xgboost/collective/socket.h"

Expand Down Expand Up @@ -61,8 +60,8 @@ using sock_size_t = size_t; // NOLINT
#pragma message("Distributed training on mingw is not supported.")
typedef struct pollfd {
SOCKET fd;
short events;
short revents;
short events; // NOLINT
short revents; // NOLINT
} WSAPOLLFD, *PWSAPOLLFD, *LPWSAPOLLFD;

// POLLRDNORM | POLLRDBAND
Expand Down Expand Up @@ -97,7 +96,8 @@ std::enable_if_t<std::is_integral_v<E>, xgboost::collective::Result> PollError(E
if ((revents & POLLERR) != 0) {
auto err = errno;
auto str = strerror(err);
return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + std::string{str} +
return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + // NOLINT
std::string{str} + // NOLINT
" code:" + std::to_string(err));
}
if ((revents & POLLNVAL) != 0) {
Expand Down Expand Up @@ -229,5 +229,3 @@ struct PollHelper {
#undef POLLPRI
#undef POLLOUT
#endif // IS_MINGW()

#endif // RABIT_INTERNAL_SOCKET_H_
3 changes: 1 addition & 2 deletions jvm-packages/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ target_include_directories(xgboost4j
${JNI_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/dmlc-core/include
${PROJECT_SOURCE_DIR}/rabit/include)
${PROJECT_SOURCE_DIR}/dmlc-core/include)

set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)
1 change: 0 additions & 1 deletion python-package/packager/sdist.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def copy_cpp_src_tree(
"include",
"dmlc-core",
"gputreeshap",
"rabit",
"cmake",
"plugin",
]:
Expand Down
8 changes: 4 additions & 4 deletions src/collective/loop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
#include <thread> // for thread
#include <utility> // for move

#include "rabit/internal/socket.h" // for PollHelper
#include "xgboost/collective/result.h" // for Fail, Success
#include "xgboost/collective/socket.h" // for FailWithCode
#include "xgboost/logging.h" // for CHECK
#include "xgboost/collective/poll_utils.h" // for PollHelper
#include "xgboost/collective/result.h" // for Fail, Success
#include "xgboost/collective/socket.h" // for FailWithCode
#include "xgboost/logging.h" // for CHECK

namespace xgboost::collective {
Result Loop::ProcessQueue(std::queue<Op>* p_queue) const {
Expand Down
4 changes: 2 additions & 2 deletions src/collective/socket.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
#include <system_error> // for error_code, system_category
#include <thread> // for sleep_for

#include "rabit/internal/socket.h" // for PollHelper
#include "xgboost/collective/result.h" // for Result
#include "xgboost/collective/poll_utils.h" // for PollHelper
#include "xgboost/collective/result.h" // for Result

#if defined(__unix__) || defined(__APPLE__)
#include <netdb.h> // getaddrinfo, freeaddrinfo
Expand Down
9 changes: 5 additions & 4 deletions src/collective/tracker.cc
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* Copyright 2023-2024, XGBoost Contributors
*/
#include "rabit/internal/socket.h"

#if defined(__unix__) || defined(__APPLE__)
#include <netdb.h> // gethostbyname
#include <sys/socket.h> // socket, AF_INET6, AF_INET, connect, getsockname
Expand All @@ -27,9 +27,10 @@
#include "comm.h"
#include "protocol.h" // for kMagic, PeerInfo
#include "tracker.h"
#include "xgboost/collective/result.h" // for Result, Fail, Success
#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ...
#include "xgboost/json.h" // for Json
#include "xgboost/collective/poll_utils.h" // for PollHelper
#include "xgboost/collective/result.h" // for Result, Fail, Success
#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ...
#include "xgboost/json.h" // for Json

namespace xgboost::collective {

Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/deploy_jvm_packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ cd jvm-packages
rm -rf $(find . -name target)
rm -rf ../build/

# Re-build package without Mock Rabit
# Re-build package
# Maven profiles:
# `default` includes modules: xgboost4j, xgboost4j-spark, xgboost4j-flink, xgboost4j-example
# `gpu` includes modules: xgboost4j-gpu, xgboost4j-spark-gpu, sets `use.cuda = ON`
Expand Down
4 changes: 0 additions & 4 deletions tests/ci_build/test_r_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ def pkgroot(path: str) -> None:
shutil.copytree("src", dest / "src" / "src")
shutil.copytree("include", dest / "src" / "include")
shutil.copytree("amalgamation", dest / "src" / "amalgamation")
# rabit
rabit = Path("rabit")
os.mkdir(dest / "src" / rabit)
shutil.copytree(rabit / "include", dest / "src" / "rabit" / "include")
# dmlc-core
dmlc_core = Path("dmlc-core")
os.mkdir(dest / "src" / dmlc_core)
Expand Down
3 changes: 1 addition & 2 deletions tests/ci_build/tidy.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,7 @@ def _configure(self):
def should_lint(path):
if not self.cpp_lint and path.endswith('.cc'):
return False
isxgb = path.find('rabit') == -1
isxgb = isxgb and path.find('dmlc-core') == -1
isxgb = path.find('dmlc-core') == -1
isxgb = isxgb and (not path.startswith(self.cdb_path))
if isxgb:
print(path)
Expand Down
6 changes: 2 additions & 4 deletions tests/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ if(PLUGIN_SYCL)
PRIVATE
${gtest_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include
${xgboost_SOURCE_DIR}/rabit/include)
${xgboost_SOURCE_DIR}/dmlc-core/include)

target_compile_definitions(plugin_sycl_test PUBLIC -DXGBOOST_USE_SYCL=1)
target_link_libraries(plugin_sycl_test PUBLIC -fsycl)
Expand Down Expand Up @@ -66,8 +65,7 @@ target_include_directories(testxgboost
PRIVATE
${GTEST_INCLUDE_DIRS}
${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include
${xgboost_SOURCE_DIR}/rabit/include)
${xgboost_SOURCE_DIR}/dmlc-core/include)
target_link_libraries(testxgboost
PRIVATE
GTest::gtest GTest::gmock)
Expand Down

0 comments on commit e6eefea

Please sign in to comment.