Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup set info. #10139

Merged
merged 19 commits into from
Mar 26, 2024
Merged
8 changes: 4 additions & 4 deletions .github/workflows/r_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ jobs:
name: Test R package on Debian
runs-on: ubuntu-latest
container:
image: rhub/debian-gcc-devel
image: rhub/debian-gcc-release

steps:
- name: Install system dependencies
Expand All @@ -130,12 +130,12 @@ jobs:
- name: Install dependencies
shell: bash -l {0}
run: |
/tmp/R-devel/bin/Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')"
Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')"

- name: Test R
shell: bash -l {0}
run: |
python3 tests/ci_build/test_r_package.py --r=/tmp/R-devel/bin/R --build-tool=autotools --task=check
python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --build-tool=autotools --task=check

- uses: dorny/paths-filter@v2
id: changes
Expand All @@ -147,4 +147,4 @@ jobs:
- name: Run document check
if: steps.changes.outputs.r_package == 'true'
run: |
python3 tests/ci_build/test_r_package.py --r=/tmp/R-devel/bin/R --task=doc
python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --task=doc
60 changes: 12 additions & 48 deletions include/xgboost/c_api.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2015~2023 by XGBoost Contributors
* Copyright 2015-2024, XGBoost Contributors
* \file c_api.h
* \author Tianqi Chen
* \brief C API of XGBoost, used for interfacing to other languages.
Expand Down Expand Up @@ -639,21 +639,14 @@ XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle,
* \param len length of array
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle,
const char *field,
const float *array,
XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const float *array,
bst_ulong len);
/*!
* \brief set uint32 vector to a content in info
* \param handle a instance of data matrix
* \param field field name
* \param array pointer to unsigned int vector
* \param len length of array
* \return 0 when success, -1 when failure happens
/**
* @deprecated since 2.1.0
*
* Use @ref XGDMatrixSetInfoFromInterface instead.
*/
XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle,
const char *field,
const unsigned *array,
XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const unsigned *array,
bst_ulong len);

/*!
Expand Down Expand Up @@ -725,42 +718,13 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
bst_ulong *size,
const char ***out_features);

/*!
* \brief Set meta info from dense matrix. Valid field names are:
*
* - label
* - weight
* - base_margin
* - group
* - label_lower_bound
* - label_upper_bound
* - feature_weights
/**
* @deprecated since 2.1.0
*
* \param handle An instance of data matrix
* \param field Field name
* \param data Pointer to consecutive memory storing data.
* \param size Size of the data, this is relative to size of type. (Meaning NOT number
* of bytes.)
* \param type Indicator of data type. This is defined in xgboost::DataType enum class.
* - float = 1
* - double = 2
* - uint32_t = 3
* - uint64_t = 4
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field,
void const *data, bst_ulong size, int type);

/*!
* \brief (deprecated) Use XGDMatrixSetUIntInfo instead. Set group of the training matrix
* \param handle a instance of data matrix
* \param group pointer to group size
* \param len length of array
* \return 0 when success, -1 when failure happens
* Use @ref XGDMatrixSetInfoFromInterface instead.
*/
XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
const unsigned *group,
bst_ulong len);
XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data,
bst_ulong size, int type);

/*!
* \brief get float info vector from matrix.
Expand Down
13 changes: 0 additions & 13 deletions include/xgboost/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include <algorithm>
#include <limits>
#include <memory>
#include <numeric>
#include <string>
#include <utility>
#include <vector>
Expand Down Expand Up @@ -137,14 +136,6 @@ class MetaInfo {
* \param fo The output stream.
*/
void SaveBinary(dmlc::Stream* fo) const;
/*!
* \brief Set information in the meta info.
* \param key The key of the information.
* \param dptr The data pointer of the source array.
* \param dtype The type of the source data.
* \param num Number of elements in the source array.
*/
void SetInfo(Context const& ctx, const char* key, const void* dptr, DataType dtype, size_t num);
/*!
* \brief Set information in the meta info with array interface.
* \param key The key of the information.
Expand Down Expand Up @@ -517,10 +508,6 @@ class DMatrix {
DMatrix() = default;
/*! \brief meta information of the dataset */
virtual MetaInfo& Info() = 0;
virtual void SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) {
auto const& ctx = *this->Ctx();
this->Info().SetInfo(ctx, key, dptr, dtype, num);
}
virtual void SetInfo(const char* key, std::string const& interface_str) {
auto const& ctx = *this->Ctx();
this->Info().SetInfo(ctx, key, StringView{interface_str});
Expand Down
15 changes: 12 additions & 3 deletions include/xgboost/linalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,14 @@ constexpr auto ArrToTuple(T (&arr)[N]) {
// uint division optimization inspired by the CIndexer in cupy. Division operation is
// slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64
// bit when the index is smaller, then try to avoid division when it's exp of 2.
template <typename I, int32_t D>
template <typename I, std::int32_t D>
LINALG_HD auto UnravelImpl(I idx, common::Span<size_t const, D> shape) {
size_t index[D]{0};
std::size_t index[D]{0};
static_assert(std::is_signed<decltype(D)>::value,
"Don't change the type without changing the for loop.");
auto const sptr = shape.data();
for (int32_t dim = D; --dim > 0;) {
auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(shape[dim]);
auto s = static_cast<std::remove_const_t<std::remove_reference_t<I>>>(sptr[dim]);
if (s & (s - 1)) {
auto t = idx / s;
index[dim] = idx - t * s;
Expand Down Expand Up @@ -745,6 +746,14 @@ auto ArrayInterfaceStr(TensorView<T, D> const &t) {
return str;
}

template <typename T>
auto Make1dInterface(T const *vec, std::size_t len) {
Context ctx;
auto t = linalg::MakeTensorView(&ctx, common::Span{vec, len}, len);
auto str = linalg::ArrayInterfaceStr(t);
return str;
}

/**
* \brief A tensor storage. To use it for other functionality like slicing one needs to
* obtain a view first. This way we can use it on both host and device.
Expand Down
10 changes: 4 additions & 6 deletions include/xgboost/span.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,8 @@
#define XGBOOST_SPAN_H_

#include <xgboost/base.h>
#include <xgboost/logging.h>

#include <cinttypes> // size_t
#include <cstddef> // size_t
#include <cstdio>
#include <iterator>
#include <limits> // numeric_limits
Expand Down Expand Up @@ -73,8 +72,7 @@

#endif // defined(_MSC_VER) && _MSC_VER < 1910

namespace xgboost {
namespace common {
namespace xgboost::common {

#if defined(__CUDA_ARCH__)
// Usual logging facility is not available inside device code.
Expand Down Expand Up @@ -707,8 +705,8 @@ class IterSpan {
return it_ + size();
}
};
} // namespace common
} // namespace xgboost
} // namespace xgboost::common


#if defined(_MSC_VER) &&_MSC_VER < 1910
#undef constexpr
Expand Down
10 changes: 6 additions & 4 deletions jvm-packages/xgboost4j/src/native/xgboost4j.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,8 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetFloatI

jfloat* array = jenv->GetFloatArrayElements(jarray, NULL);
bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray);
int ret = XGDMatrixSetFloatInfo(handle, field, (float const *)array, len);
auto str = xgboost::linalg::Make1dInterface(array, len);
int ret = XGDMatrixSetInfoFromInterface(handle, field, str.c_str());
JVM_CHECK_CALL(ret);
//release
if (field) jenv->ReleaseStringUTFChars(jfield, field);
Expand All @@ -427,7 +428,8 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGDMatrixSetUIntIn
const char* field = jenv->GetStringUTFChars(jfield, 0);
jint* array = jenv->GetIntArrayElements(jarray, NULL);
bst_ulong len = (bst_ulong)jenv->GetArrayLength(jarray);
int ret = XGDMatrixSetUIntInfo(handle, (char const *)field, (unsigned int const *)array, len);
auto str = xgboost::linalg::Make1dInterface(array, len);
int ret = XGDMatrixSetInfoFromInterface(handle, field, str.c_str());
JVM_CHECK_CALL(ret);
//release
if (field) jenv->ReleaseStringUTFChars(jfield, (const char *)field);
Expand Down Expand Up @@ -730,8 +732,8 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterPredictFr
if (jmargin) {
margin = jenv->GetFloatArrayElements(jmargin, nullptr);
JVM_CHECK_CALL(XGProxyDMatrixCreate(&proxy));
JVM_CHECK_CALL(
XGDMatrixSetFloatInfo(proxy, "base_margin", margin, jenv->GetArrayLength(jmargin)));
auto str = xgboost::linalg::Make1dInterface(margin, jenv->GetArrayLength(jmargin));
JVM_CHECK_CALL(XGDMatrixSetInfoFromInterface(proxy, "base_margin", str.c_str()));
}

bst_ulong const *out_shape;
Expand Down
62 changes: 48 additions & 14 deletions src/c_api/c_api.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2014-2024 by XGBoost Contributors
* Copyright 2014-2024, XGBoost Contributors
*/
#include "xgboost/c_api.h"

Expand Down Expand Up @@ -614,8 +614,8 @@ XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const
API_BEGIN();
CHECK_HANDLE();
xgboost_CHECK_C_ARG_PTR(field);
auto const& p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
p_fmat->SetInfo(field, info, xgboost::DataType::kFloat32, len);
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
p_fmat->SetInfo(field, linalg::Make1dInterface(info, len));
API_END();
}

Expand All @@ -634,8 +634,9 @@ XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const
API_BEGIN();
CHECK_HANDLE();
xgboost_CHECK_C_ARG_PTR(field);
LOG(WARNING) << error::DeprecatedFunc(__func__, "2.1.0", "XGDMatrixSetInfoFromInterface");
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
p_fmat->SetInfo(field, info, xgboost::DataType::kUInt32, len);
p_fmat->SetInfo(field, linalg::Make1dInterface(info, len));
API_END();
}

Expand Down Expand Up @@ -679,19 +680,52 @@ XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void
xgboost::bst_ulong size, int type) {
API_BEGIN();
CHECK_HANDLE();
LOG(WARNING) << error::DeprecatedFunc(__func__, "2.1.0", "XGDMatrixSetInfoFromInterface");
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
CHECK(type >= 1 && type <= 4);
xgboost_CHECK_C_ARG_PTR(field);
p_fmat->SetInfo(field, data, static_cast<DataType>(type), size);
API_END();
}

XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle, const unsigned *group, xgboost::bst_ulong len) {
API_BEGIN();
CHECK_HANDLE();
LOG(WARNING) << "XGDMatrixSetGroup is deprecated, use `XGDMatrixSetUIntInfo` instead.";
auto const &p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
p_fmat->SetInfo("group", group, xgboost::DataType::kUInt32, len);
Context ctx;
auto dtype = static_cast<DataType>(type);
std::string str;
auto proc = [&](auto cast_d_ptr) {
using T = std::remove_pointer_t<decltype(cast_d_ptr)>;
auto t = linalg::TensorView<T, 1>(
common::Span<T>{cast_d_ptr, static_cast<typename common::Span<T>::index_type>(size)},
{size}, DeviceOrd::CPU());
CHECK(t.CContiguous());
Json interface{linalg::ArrayInterface(t)};
assert(ArrayInterface<1>{interface}.is_contiguous);
trivialfis marked this conversation as resolved.
Show resolved Hide resolved
str = Json::Dump(interface);
return str;
};

// Legacy code using XGBoost dtype, which is a small subset of array interface types.
switch (dtype) {
case xgboost::DataType::kFloat32: {
auto cast_ptr = reinterpret_cast<const float *>(data);
p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr));
break;
}
case xgboost::DataType::kDouble: {
auto cast_ptr = reinterpret_cast<const double *>(data);
p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr));
break;
}
case xgboost::DataType::kUInt32: {
auto cast_ptr = reinterpret_cast<const uint32_t *>(data);
p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr));
break;
}
case xgboost::DataType::kUInt64: {
auto cast_ptr = reinterpret_cast<const uint64_t *>(data);
p_fmat->Info().SetInfo(ctx, field, proc(cast_ptr));
break;
}
default:
LOG(FATAL) << "Unknown data type" << static_cast<uint8_t>(dtype);
}

API_END();
}

Expand Down Expand Up @@ -987,7 +1021,7 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, bs
bst_float *hess, xgboost::bst_ulong len) {
API_BEGIN();
CHECK_HANDLE();
error::DeprecatedFunc(__func__, "2.1.0", "XGBoosterTrainOneIter");
LOG(WARNING) << error::DeprecatedFunc(__func__, "2.1.0", "XGBoosterTrainOneIter");
auto *learner = static_cast<Learner *>(handle);
auto ctx = learner->Ctx()->MakeCPU();

Expand Down
19 changes: 10 additions & 9 deletions src/c_api/c_api_utils.h
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
/**
* Copyright 2021-2023, XGBoost Contributors
* Copyright 2021-2024, XGBoost Contributors
*/
#ifndef XGBOOST_C_API_C_API_UTILS_H_
#define XGBOOST_C_API_C_API_UTILS_H_

#include <algorithm>
#include <cstddef>
#include <functional>
#include <memory> // for shared_ptr
#include <string> // for string
#include <tuple> // for make_tuple
#include <utility> // for move
#include <vector>
#include <algorithm> // for min
#include <cstddef> // for size_t
#include <functional> // for multiplies
#include <memory> // for shared_ptr
#include <numeric> // for accumulate
#include <string> // for string
#include <tuple> // for make_tuple
#include <utility> // for move
#include <vector> // for vector

#include "../common/json_utils.h" // for TypeCheck
#include "xgboost/c_api.h"
Expand Down
2 changes: 2 additions & 0 deletions src/collective/nccl_device_communicator.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
* Copyright 2023 XGBoost contributors
*/
#if defined(XGBOOST_USE_NCCL)
#include <numeric> // for accumulate

#include "comm.cuh"
#include "nccl_device_communicator.cuh"

Expand Down
2 changes: 1 addition & 1 deletion src/common/error_msg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include "xgboost/logging.h"

namespace xgboost::error {
std::string DeprecatedFunc(StringView old, StringView since, StringView replacement) {
[[nodiscard]] std::string DeprecatedFunc(StringView old, StringView since, StringView replacement) {
std::stringstream ss;
ss << "`" << old << "` is deprecated since" << since << ", use `" << replacement << "` instead.";
return ss.str();
Expand Down
2 changes: 1 addition & 1 deletion src/common/error_msg.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ void WarnDeprecatedGPUId();

void WarnEmptyDataset();

std::string DeprecatedFunc(StringView old, StringView since, StringView replacement);
[[nodiscard]] std::string DeprecatedFunc(StringView old, StringView since, StringView replacement);

constexpr StringView InvalidCUDAOrdinal() {
return "Invalid device. `device` is required to be CUDA and there must be at least one GPU "
Expand Down
Loading
Loading