Skip to content

Commit

Permalink
[sycl] add loss guided hist building (#10251)
Browse files Browse the repository at this point in the history
Co-authored-by: Dmitry Razdoburdin <>
  • Loading branch information
razdoburdin committed May 10, 2024
1 parent 9b46505 commit f588252
Show file tree
Hide file tree
Showing 7 changed files with 459 additions and 30 deletions.
87 changes: 87 additions & 0 deletions plugin/sycl/common/hist_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,93 @@ ::sycl::event SubtractionHist(::sycl::queue qu,
const GHistRow<GradientSumT, MemoryType::on_device>& src2,
size_t size, ::sycl::event event_priv);

/*!
* \brief Histograms of gradient statistics for multiple nodes
*/
template<typename GradientSumT, MemoryType memory_type = MemoryType::shared>
class HistCollection {
public:
using GHistRowT = GHistRow<GradientSumT, memory_type>;

// Access histogram for i-th node
GHistRowT& operator[](bst_uint nid) {
return *(data_.at(nid));
}

const GHistRowT& operator[](bst_uint nid) const {
return *(data_.at(nid));
}

// Initialize histogram collection
void Init(::sycl::queue qu, uint32_t nbins) {
qu_ = qu;
if (nbins_ != nbins) {
nbins_ = nbins;
data_.clear();
}
}

// Create an empty histogram for i-th node
::sycl::event AddHistRow(bst_uint nid) {
::sycl::event event;
if (data_.count(nid) == 0) {
data_[nid] =
std::make_shared<GHistRowT>(&qu_, nbins_,
xgboost::detail::GradientPairInternal<GradientSumT>(0, 0),
&event);
} else {
data_[nid]->Resize(&qu_, nbins_,
xgboost::detail::GradientPairInternal<GradientSumT>(0, 0),
&event);
}
return event;
}

private:
/*! \brief Number of all bins over all features */
uint32_t nbins_ = 0;

std::unordered_map<uint32_t, std::shared_ptr<GHistRowT>> data_;

::sycl::queue qu_;
};

/*!
* \brief Stores temporary histograms to compute them in parallel
*/
template<typename GradientSumT>
class ParallelGHistBuilder {
public:
using GHistRowT = GHistRow<GradientSumT, MemoryType::on_device>;

void Init(::sycl::queue qu, size_t nbins) {
qu_ = qu;
if (nbins != nbins_) {
hist_buffer_.Init(qu_, nbins);
nbins_ = nbins;
}
}

void Reset(size_t nblocks) {
hist_device_buffer_.Resize(&qu_, nblocks * nbins_ * 2);
}

GHistRowT& GetDeviceBuffer() {
return hist_device_buffer_;
}

protected:
/*! \brief Number of bins in each histogram */
size_t nbins_ = 0;
/*! \brief Buffers for histograms for all nodes processed */
HistCollection<GradientSumT> hist_buffer_;

/*! \brief Buffer for additional histograms for Parallel processing */
GHistRowT hist_device_buffer_;

::sycl::queue qu_;
};

/*!
* \brief Builder for histograms of gradient statistics
*/
Expand Down
6 changes: 6 additions & 0 deletions plugin/sycl/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ class USMVector {
qu->fill(data_.get(), v, size_).wait();
}

USMVector(::sycl::queue* qu, size_t size, T v,
::sycl::event* event) : size_(size), capacity_(size) {
data_ = allocate_memory_(qu, size_);
*event = qu->fill(data_.get(), v, size_, *event);
}

USMVector(::sycl::queue* qu, const std::vector<T> &vec) {
size_ = vec.size();
capacity_ = size_;
Expand Down
46 changes: 46 additions & 0 deletions plugin/sycl/tree/hist_row_adder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*!
* Copyright 2017-2024 by Contributors
* \file hist_row_adder.h
*/
#ifndef PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_
#define PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_

#include <vector>
#include <algorithm>

namespace xgboost {
namespace sycl {
namespace tree {

template <typename GradientSumT>
class HistRowsAdder {
public:
virtual void AddHistRows(HistUpdater<GradientSumT>* builder,
std::vector<int>* sync_ids, RegTree *p_tree) = 0;
virtual ~HistRowsAdder() = default;
};

template <typename GradientSumT>
class BatchHistRowsAdder: public HistRowsAdder<GradientSumT> {
public:
void AddHistRows(HistUpdater<GradientSumT>* builder,
std::vector<int>* sync_ids, RegTree *p_tree) override {
builder->builder_monitor_.Start("AddHistRows");

for (auto const& entry : builder->nodes_for_explicit_hist_build_) {
int nid = entry.nid;
auto event = builder->hist_.AddHistRow(nid);
}
for (auto const& node : builder->nodes_for_subtraction_trick_) {
auto event = builder->hist_.AddHistRow(node.nid);
}

builder->builder_monitor_.Stop("AddHistRows");
}
};

} // namespace tree
} // namespace sycl
} // namespace xgboost

#endif // PLUGIN_SYCL_TREE_HIST_ROW_ADDER_H_
68 changes: 68 additions & 0 deletions plugin/sycl/tree/hist_synchronizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*!
* Copyright 2017-2024 by Contributors
* \file hist_synchronizer.h
*/
#ifndef PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_
#define PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_

#include <vector>

#include "../common/hist_util.h"
#include "expand_entry.h"

namespace xgboost {
namespace sycl {
namespace tree {

template <typename GradientSumT>
class HistUpdater;

template <typename GradientSumT>
class HistSynchronizer {
public:
virtual void SyncHistograms(HistUpdater<GradientSumT>* builder,
const std::vector<int>& sync_ids,
RegTree *p_tree) = 0;
virtual ~HistSynchronizer() = default;
};

template <typename GradientSumT>
class BatchHistSynchronizer: public HistSynchronizer<GradientSumT> {
public:
void SyncHistograms(HistUpdater<GradientSumT>* builder,
const std::vector<int>& sync_ids,
RegTree *p_tree) override {
builder->builder_monitor_.Start("SyncHistograms");
const size_t nbins = builder->hist_builder_.GetNumBins();

hist_sync_events_.resize(builder->nodes_for_explicit_hist_build_.size());
for (int i = 0; i < builder->nodes_for_explicit_hist_build_.size(); i++) {
const auto entry = builder->nodes_for_explicit_hist_build_[i];
auto& this_hist = builder->hist_[entry.nid];

if (!(*p_tree)[entry.nid].IsRoot()) {
const size_t parent_id = (*p_tree)[entry.nid].Parent();
auto& parent_hist = builder->hist_[parent_id];
auto& sibling_hist = builder->hist_[entry.GetSiblingId(p_tree, parent_id)];
hist_sync_events_[i] = common::SubtractionHist(builder->qu_, &sibling_hist, parent_hist,
this_hist, nbins, ::sycl::event());
}
}
builder->qu_.wait_and_throw();

builder->builder_monitor_.Stop("SyncHistograms");
}

std::vector<::sycl::event> GetEvents() const {
return hist_sync_events_;
}

private:
std::vector<::sycl::event> hist_sync_events_;
};

} // namespace tree
} // namespace sycl
} // namespace xgboost

#endif // PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_
93 changes: 93 additions & 0 deletions plugin/sycl/tree/hist_updater.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,69 @@

#include <oneapi/dpl/random>

#include "../common/hist_util.h"

namespace xgboost {
namespace sycl {
namespace tree {

template <typename GradientSumT>
void HistUpdater<GradientSumT>::SetHistSynchronizer(
HistSynchronizer<GradientSumT> *sync) {
hist_synchronizer_.reset(sync);
}

template <typename GradientSumT>
void HistUpdater<GradientSumT>::SetHistRowsAdder(
HistRowsAdder<GradientSumT> *adder) {
hist_rows_adder_.reset(adder);
}

template <typename GradientSumT>
void HistUpdater<GradientSumT>::BuildHistogramsLossGuide(
ExpandEntry entry,
const common::GHistIndexMatrix &gmat,
RegTree *p_tree,
const USMVector<GradientPair, MemoryType::on_device> &gpair_device) {
nodes_for_explicit_hist_build_.clear();
nodes_for_subtraction_trick_.clear();
nodes_for_explicit_hist_build_.push_back(entry);

if (!(*p_tree)[entry.nid].IsRoot()) {
auto sibling_id = entry.GetSiblingId(p_tree);
nodes_for_subtraction_trick_.emplace_back(sibling_id, p_tree->GetDepth(sibling_id));
}

std::vector<int> sync_ids;
hist_rows_adder_->AddHistRows(this, &sync_ids, p_tree);
qu_.wait_and_throw();
BuildLocalHistograms(gmat, p_tree, gpair_device);
hist_synchronizer_->SyncHistograms(this, sync_ids, p_tree);
}

template<typename GradientSumT>
void HistUpdater<GradientSumT>::BuildLocalHistograms(
const common::GHistIndexMatrix &gmat,
RegTree *p_tree,
const USMVector<GradientPair, MemoryType::on_device> &gpair_device) {
builder_monitor_.Start("BuildLocalHistograms");
const size_t n_nodes = nodes_for_explicit_hist_build_.size();
::sycl::event event;

for (size_t i = 0; i < n_nodes; i++) {
const int32_t nid = nodes_for_explicit_hist_build_[i].nid;

if (row_set_collection_[nid].Size() > 0) {
event = BuildHist(gpair_device, row_set_collection_[nid], gmat, &(hist_[nid]),
&(hist_buffer_.GetDeviceBuffer()), event);
} else {
common::InitHist(qu_, &(hist_[nid]), hist_[nid].Size(), &event);
}
}
qu_.wait_and_throw();
builder_monitor_.Stop("BuildLocalHistograms");
}

template<typename GradientSumT>
void HistUpdater<GradientSumT>::InitSampling(
const USMVector<GradientPair, MemoryType::on_device> &gpair,
Expand Down Expand Up @@ -70,6 +129,21 @@ void HistUpdater<GradientSumT>::InitData(
// initialize the row set
{
row_set_collection_.Clear();

// initialize histogram collection
uint32_t nbins = gmat.cut.Ptrs().back();
hist_.Init(qu_, nbins);

hist_buffer_.Init(qu_, nbins);
size_t buffer_size = kBufferSize;
if (buffer_size > info.num_row_ / kMinBlockSize + 1) {
buffer_size = info.num_row_ / kMinBlockSize + 1;
}
hist_buffer_.Reset(buffer_size);

// initialize histogram builder
hist_builder_ = common::GHistBuilder<GradientSumT>(qu_, nbins);

USMVector<size_t, MemoryType::on_device>* row_indices = &(row_set_collection_.Data());
row_indices->Resize(&qu_, info.num_row_);
size_t* p_row_indices = row_indices->Data();
Expand Down Expand Up @@ -122,6 +196,25 @@ void HistUpdater<GradientSumT>::InitData(
}
}
row_set_collection_.Init();

{
/* determine layout of data */
const size_t nrow = info.num_row_;
const size_t ncol = info.num_col_;
const size_t nnz = info.num_nonzero_;
// number of discrete bins for feature 0
const uint32_t nbins_f0 = gmat.cut.Ptrs()[1] - gmat.cut.Ptrs()[0];
if (nrow * ncol == nnz) {
// dense data with zero-based indexing
data_layout_ = kDenseDataZeroBased;
} else if (nbins_f0 == 0 && nrow * (ncol - 1) == nnz) {
// dense data with one-based indexing
data_layout_ = kDenseDataOneBased;
} else {
// sparse data
data_layout_ = kSparseData;
}
}
}

template class HistUpdater<float>;
Expand Down
Loading

0 comments on commit f588252

Please sign in to comment.