Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[coll] Move the rabit poll helper. #10349

Merged
merged 3 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,6 @@ if(BUILD_DEPRECATED_CLI)
PRIVATE
${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include
${xgboost_SOURCE_DIR}/rabit/include
)
set_target_properties(runxgboost PROPERTIES OUTPUT_NAME xgboost)
xgboost_target_properties(runxgboost)
Expand Down
2 changes: 0 additions & 2 deletions R-package/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,13 @@ target_compile_definitions(
-DDMLC_LOG_BEFORE_THROW=0
-DDMLC_DISABLE_STDIN=1
-DDMLC_LOG_CUSTOMIZE=1
-DRABIT_STRICT_CXX98_
)

target_include_directories(
xgboost-r PRIVATE
${LIBR_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/dmlc-core/include
${PROJECT_SOURCE_DIR}/rabit/include
)

target_link_libraries(xgboost-r PUBLIC ${LIBR_CORE_LIBRARY})
Expand Down
1 change: 0 additions & 1 deletion R-package/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
PKG_CPPFLAGS = \
-I$(PKGROOT)/include \
-I$(PKGROOT)/dmlc-core/include \
-I$(PKGROOT)/rabit/include \
-I$(PKGROOT) \
$(XGB_RFLAGS)

Expand Down
1 change: 0 additions & 1 deletion R-package/src/Makevars.win
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ $(foreach v, $(XGB_RFLAGS), $(warning $(v)))
PKG_CPPFLAGS = \
-I$(PKGROOT)/include \
-I$(PKGROOT)/dmlc-core/include \
-I$(PKGROOT)/rabit/include \
-I$(PKGROOT) \
$(XGB_RFLAGS)

Expand Down
1 change: 0 additions & 1 deletion cmake/Utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,6 @@ function(xgboost_set_cuda_flags target)
target_include_directories(
${target} PRIVATE
${xgboost_SOURCE_DIR}/gputreeshap
${xgboost_SOURCE_DIR}/rabit/include
${CUDAToolkit_INCLUDE_DIRS})

if(MSVC)
Expand Down
2 changes: 1 addition & 1 deletion demo/c-api/basic/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ TGT=c-api-demo
cc=cc
CFLAGS ?=-O3
XGBOOST_ROOT ?=../..
INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include -I$(XGBOOST_ROOT)/rabit/include
INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include
LIB_DIR=-L$(XGBOOST_ROOT)/lib

build: $(TGT)
Expand Down
2 changes: 1 addition & 1 deletion doc/build.rst
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ From the command line on Linux starting from the XGBoost directory:

.. note:: Faster distributed GPU training with NCCL

By default, distributed GPU training is enabled and uses Rabit for communication. For faster training, set the option ``USE_NCCL=ON``. Faster distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **faster distributed GPU training is available only for Linux**.
By default, distributed GPU training is enabled with the option ``USE_NCCL=ON``. Distributed GPU training depends on NCCL2, available at `this link <https://developer.nvidia.com/nccl>`_. Since NCCL2 is only available for Linux machines, **Distributed GPU training is available only for Linux**.

.. code-block:: bash

Expand Down
2 changes: 1 addition & 1 deletion doc/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ The ultimate question will still come back to how to push the limit of each comp
and use less resources to complete the task (thus with less communication and chance of failure).

To achieve these, we decide to reuse the optimizations in the single node XGBoost and build the distributed version on top of it.
The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs (in our case rabit).
The demand for communication in machine learning is rather simple, in the sense that we can depend on a limited set of APIs.
Such design allows us to reuse most of the code, while being portable to major platforms such as Hadoop/Yarn, MPI, SGE.
Most importantly, it pushes the limit of the computation resources we can use.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
* \file socket.h
* \author Tianqi Chen
*/
#ifndef RABIT_INTERNAL_SOCKET_H_
#define RABIT_INTERNAL_SOCKET_H_
#pragma once
#include "xgboost/collective/result.h"
#include "xgboost/collective/socket.h"

Expand Down Expand Up @@ -61,8 +60,8 @@ using sock_size_t = size_t; // NOLINT
#pragma message("Distributed training on mingw is not supported.")
typedef struct pollfd {
SOCKET fd;
short events;
short revents;
short events; // NOLINT
short revents; // NOLINT
} WSAPOLLFD, *PWSAPOLLFD, *LPWSAPOLLFD;

// POLLRDNORM | POLLRDBAND
Expand Down Expand Up @@ -97,7 +96,8 @@ std::enable_if_t<std::is_integral_v<E>, xgboost::collective::Result> PollError(E
if ((revents & POLLERR) != 0) {
auto err = errno;
auto str = strerror(err);
return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + std::string{str} +
return xgboost::system::FailWithCode(std::string{"Poll error condition:"} + // NOLINT
std::string{str} + // NOLINT
" code:" + std::to_string(err));
}
if ((revents & POLLNVAL) != 0) {
Expand Down Expand Up @@ -229,5 +229,3 @@ struct PollHelper {
#undef POLLPRI
#undef POLLOUT
#endif // IS_MINGW()

#endif // RABIT_INTERNAL_SOCKET_H_
3 changes: 1 addition & 2 deletions jvm-packages/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ target_include_directories(xgboost4j
${JNI_INCLUDE_DIRS}
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/dmlc-core/include
${PROJECT_SOURCE_DIR}/rabit/include)
${PROJECT_SOURCE_DIR}/dmlc-core/include)

set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib)
1 change: 0 additions & 1 deletion python-package/packager/sdist.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def copy_cpp_src_tree(
"include",
"dmlc-core",
"gputreeshap",
"rabit",
"cmake",
"plugin",
]:
Expand Down
8 changes: 4 additions & 4 deletions src/collective/loop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
#include <thread> // for thread
#include <utility> // for move

#include "rabit/internal/socket.h" // for PollHelper
#include "xgboost/collective/result.h" // for Fail, Success
#include "xgboost/collective/socket.h" // for FailWithCode
#include "xgboost/logging.h" // for CHECK
#include "xgboost/collective/poll_utils.h" // for PollHelper
#include "xgboost/collective/result.h" // for Fail, Success
#include "xgboost/collective/socket.h" // for FailWithCode
#include "xgboost/logging.h" // for CHECK

namespace xgboost::collective {
Result Loop::ProcessQueue(std::queue<Op>* p_queue) const {
Expand Down
4 changes: 2 additions & 2 deletions src/collective/socket.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
#include <system_error> // for error_code, system_category
#include <thread> // for sleep_for

#include "rabit/internal/socket.h" // for PollHelper
#include "xgboost/collective/result.h" // for Result
#include "xgboost/collective/poll_utils.h" // for PollHelper
#include "xgboost/collective/result.h" // for Result

#if defined(__unix__) || defined(__APPLE__)
#include <netdb.h> // getaddrinfo, freeaddrinfo
Expand Down
9 changes: 5 additions & 4 deletions src/collective/tracker.cc
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* Copyright 2023-2024, XGBoost Contributors
*/
#include "rabit/internal/socket.h"

#if defined(__unix__) || defined(__APPLE__)
#include <netdb.h> // gethostbyname
#include <sys/socket.h> // socket, AF_INET6, AF_INET, connect, getsockname
Expand All @@ -27,9 +27,10 @@
#include "comm.h"
#include "protocol.h" // for kMagic, PeerInfo
#include "tracker.h"
#include "xgboost/collective/result.h" // for Result, Fail, Success
#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ...
#include "xgboost/json.h" // for Json
#include "xgboost/collective/poll_utils.h" // for PollHelper
#include "xgboost/collective/result.h" // for Result, Fail, Success
#include "xgboost/collective/socket.h" // for GetHostName, FailWithCode, MakeSockAddress, ...
#include "xgboost/json.h" // for Json

namespace xgboost::collective {

Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/deploy_jvm_packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ cd jvm-packages
rm -rf $(find . -name target)
rm -rf ../build/

# Re-build package without Mock Rabit
# Re-build package
# Maven profiles:
# `default` includes modules: xgboost4j, xgboost4j-spark, xgboost4j-flink, xgboost4j-example
# `gpu` includes modules: xgboost4j-gpu, xgboost4j-spark-gpu, sets `use.cuda = ON`
Expand Down
4 changes: 0 additions & 4 deletions tests/ci_build/test_r_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ def pkgroot(path: str) -> None:
shutil.copytree("src", dest / "src" / "src")
shutil.copytree("include", dest / "src" / "include")
shutil.copytree("amalgamation", dest / "src" / "amalgamation")
# rabit
rabit = Path("rabit")
os.mkdir(dest / "src" / rabit)
shutil.copytree(rabit / "include", dest / "src" / "rabit" / "include")
# dmlc-core
dmlc_core = Path("dmlc-core")
os.mkdir(dest / "src" / dmlc_core)
Expand Down
3 changes: 1 addition & 2 deletions tests/ci_build/tidy.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,7 @@ def _configure(self):
def should_lint(path):
if not self.cpp_lint and path.endswith('.cc'):
return False
isxgb = path.find('rabit') == -1
isxgb = isxgb and path.find('dmlc-core') == -1
isxgb = path.find('dmlc-core') == -1
isxgb = isxgb and (not path.startswith(self.cdb_path))
if isxgb:
print(path)
Expand Down
6 changes: 2 additions & 4 deletions tests/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ if(PLUGIN_SYCL)
PRIVATE
${gtest_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include
${xgboost_SOURCE_DIR}/rabit/include)
${xgboost_SOURCE_DIR}/dmlc-core/include)

target_compile_definitions(plugin_sycl_test PUBLIC -DXGBOOST_USE_SYCL=1)
target_link_libraries(plugin_sycl_test PUBLIC -fsycl)
Expand Down Expand Up @@ -66,8 +65,7 @@ target_include_directories(testxgboost
PRIVATE
${GTEST_INCLUDE_DIRS}
${xgboost_SOURCE_DIR}/include
${xgboost_SOURCE_DIR}/dmlc-core/include
${xgboost_SOURCE_DIR}/rabit/include)
${xgboost_SOURCE_DIR}/dmlc-core/include)
target_link_libraries(testxgboost
PRIVATE
GTest::gtest GTest::gmock)
Expand Down
Loading