Skip to content

Commit

Permalink
Use view for SparsePage exclusively. (#6590)
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Jan 11, 2021
1 parent 78f2cd8 commit f2f7dd8
Show file tree
Hide file tree
Showing 23 changed files with 151 additions and 113 deletions.
9 changes: 0 additions & 9 deletions include/xgboost/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,15 +252,6 @@ class SparsePage {
/*! \brief an instance of sparse vector in the batch */
using Inst = common::Span<Entry const>;

/*! \brief get i-th row from the batch */
inline Inst operator[](size_t i) const {
const auto& data_vec = data.HostVector();
const auto& offset_vec = offset.HostVector();
size_t size = offset_vec[i + 1] - offset_vec[i];
return {data_vec.data() + offset_vec[i],
static_cast<Inst::index_type>(size)};
}

HostSparsePageView GetView() const {
return {offset.ConstHostSpan(), data.ConstHostSpan()};
}
Expand Down
3 changes: 2 additions & 1 deletion src/common/hist_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_bins) {
const size_t batch_threads = std::max(
size_t(1),
std::min(batch.Size(), static_cast<size_t>(omp_get_max_threads())));
auto page = batch.GetView();
MemStackAllocator<size_t, 128> partial_sums(batch_threads);
size_t* p_part = partial_sums.Get();

Expand All @@ -92,7 +93,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_bins) {

size_t sum = 0;
for (size_t i = ibegin; i < iend; ++i) {
sum += batch[i].size();
sum += page[i].size();
row_ptr[rbegin + 1 + i] = sum;
}
}
Expand Down
9 changes: 5 additions & 4 deletions src/data/data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -825,19 +825,20 @@ SparsePage SparsePage::GetTranspose(int num_columns) const {
const int nthread = omp_get_max_threads();
builder.InitBudget(num_columns, nthread);
long batch_size = static_cast<long>(this->Size()); // NOLINT(*)
#pragma omp parallel for default(none) shared(batch_size, builder) schedule(static)
auto page = this->GetView();
#pragma omp parallel for default(none) shared(batch_size, builder, page) schedule(static)
for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
int tid = omp_get_thread_num();
auto inst = (*this)[i];
auto inst = page[i];
for (const auto& entry : inst) {
builder.AddBudget(entry.index, tid);
}
}
builder.InitStorage();
#pragma omp parallel for default(none) shared(batch_size, builder) schedule(static)
#pragma omp parallel for default(none) shared(batch_size, builder, page) schedule(static)
for (long i = 0; i < batch_size; ++i) { // NOLINT(*)
int tid = omp_get_thread_num();
auto inst = (*this)[i];
auto inst = page[i];
for (const auto& entry : inst) {
builder.Push(
entry.index,
Expand Down
5 changes: 2 additions & 3 deletions src/data/simple_dmatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,12 @@ DMatrix* SimpleDMatrix::Slice(common::Span<int32_t const> ridxs) {
auto out = new SimpleDMatrix;
SparsePage& out_page = out->sparse_page_;
for (auto const &page : this->GetBatches<SparsePage>()) {
page.data.HostVector();
page.offset.HostVector();
auto batch = page.GetView();
auto& h_data = out_page.data.HostVector();
auto& h_offset = out_page.offset.HostVector();
size_t rptr{0};
for (auto ridx : ridxs) {
auto inst = page[ridx];
auto inst = batch[ridx];
rptr += inst.size();
std::copy(inst.begin(), inst.end(), std::back_inserter(h_data));
h_offset.emplace_back(rptr);
Expand Down
3 changes: 2 additions & 1 deletion src/gbm/gblinear.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,10 @@ class GBLinear : public GradientBooster {
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
// parallel over local batch
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
auto page = batch.GetView();
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) {
auto inst = batch[i];
auto inst = page[i];
auto row_idx = static_cast<size_t>(batch.base_rowid + i);
// loop over output groups
for (int gid = 0; gid < ngroup; ++gid) {
Expand Down
5 changes: 3 additions & 2 deletions src/gbm/gbtree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,7 @@ class Dart : public GBTree {
CHECK_EQ(preds.size(), p_fmat->Info().num_row_ * num_group);
// start collecting the prediction
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
auto page = batch.GetView();
constexpr int kUnroll = 8;
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
const bst_omp_uint rest = nsize % kUnroll;
Expand All @@ -692,7 +693,7 @@ class Dart : public GBTree {
ridx[k] = static_cast<int64_t>(batch.base_rowid + i + k);
}
for (int k = 0; k < kUnroll; ++k) {
inst[k] = batch[i + k];
inst[k] = page[i + k];
}
for (int k = 0; k < kUnroll; ++k) {
for (int gid = 0; gid < num_group; ++gid) {
Expand All @@ -707,7 +708,7 @@ class Dart : public GBTree {
for (bst_omp_uint i = nsize - rest; i < nsize; ++i) {
RegTree::FVec& feats = thread_temp_[0];
const auto ridx = static_cast<int64_t>(batch.base_rowid + i);
const SparsePage::Inst inst = batch[i];
const SparsePage::Inst inst = page[i];
for (int gid = 0; gid < num_group; ++gid) {
const size_t offset = ridx * num_group + gid;
preds[offset] +=
Expand Down
19 changes: 12 additions & 7 deletions src/linear/coordinate_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ inline std::pair<double, double> GetGradient(int group_idx, int num_group, int f
DMatrix *p_fmat) {
double sum_grad = 0.0, sum_hess = 0.0;
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
auto col = batch[fidx];
auto page = batch.GetView();
auto col = page[fidx];
const auto ndata = static_cast<bst_omp_uint>(col.size());
for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_float v = col[j].fvalue;
Expand Down Expand Up @@ -111,7 +112,8 @@ inline std::pair<double, double> GetGradientParallel(int group_idx, int num_grou
DMatrix *p_fmat) {
double sum_grad = 0.0, sum_hess = 0.0;
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
auto col = batch[fidx];
auto page = batch.GetView();
auto col = page[fidx];
const auto ndata = static_cast<bst_omp_uint>(col.size());
#pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
for (bst_omp_uint j = 0; j < ndata; ++j) {
Expand Down Expand Up @@ -166,7 +168,8 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
DMatrix *p_fmat) {
if (dw == 0.0f) return;
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
auto col = batch[fidx];
auto page = batch.GetView();
auto col = page[fidx];
// update grad value
const auto num_row = static_cast<bst_omp_uint>(col.size());
#pragma omp parallel for schedule(static)
Expand Down Expand Up @@ -334,9 +337,10 @@ class GreedyFeatureSelector : public FeatureSelector {
// Calculate univariate gradient sums
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
#pragma omp parallel for schedule(static)
auto page = batch.GetView();
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nfeat; ++i) {
const auto col = batch[i];
const auto col = page[i];
const bst_uint ndata = col.size();
auto &sums = gpair_sums_[group_idx * nfeat + i];
for (bst_uint j = 0u; j < ndata; ++j) {
Expand Down Expand Up @@ -399,10 +403,11 @@ class ThriftyFeatureSelector : public FeatureSelector {
// Calculate univariate gradient sums
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
// column-parallel is usually faster than row-parallel
auto page = batch.GetView();
// column-parallel is usually fastaer than row-parallel
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nfeat; ++i) {
const auto col = batch[i];
const auto col = page[i];
const bst_uint ndata = col.size();
for (bst_uint gid = 0u; gid < ngroup; ++gid) {
auto &sums = gpair_sums_[gid * nfeat + i];
Expand Down
5 changes: 3 additions & 2 deletions src/linear/updater_gpu_coordinate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT

CHECK(p_fmat->SingleColBlock());
SparsePage const& batch = *(p_fmat->GetBatches<CSCPage>().begin());
auto page = batch.GetView();

if (IsEmpty()) {
return;
Expand All @@ -72,7 +73,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
row_ptr_ = {0};
// iterate through columns
for (size_t fidx = 0; fidx < batch.Size(); fidx++) {
common::Span<Entry const> col = batch[fidx];
common::Span<Entry const> col = page[fidx];
auto cmp = [](Entry e1, Entry e2) {
return e1.index < e2.index;
};
Expand All @@ -89,7 +90,7 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
data_.resize(row_ptr_.back());
gpair_.resize(num_row_ * model_param.num_output_group);
for (size_t fidx = 0; fidx < batch.Size(); fidx++) {
auto col = batch[fidx];
auto col = page[fidx];
auto seg = column_segments[fidx];
dh::safe_cuda(cudaMemcpy(
data_.data().get() + row_ptr_[fidx],
Expand Down
3 changes: 2 additions & 1 deletion src/linear/updater_shotgun.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class ShotgunUpdater : public LinearUpdater {
selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
param_.reg_alpha_denorm, param_.reg_lambda_denorm, 0);
for (const auto &batch : p_fmat->GetBatches<CSCPage>()) {
auto page = batch.GetView();
const auto nfeat = static_cast<bst_omp_uint>(batch.Size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nfeat; ++i) {
Expand All @@ -60,7 +61,7 @@ class ShotgunUpdater : public LinearUpdater {
param_.reg_lambda_denorm);
if (ii < 0) continue;
const bst_uint fid = ii;
auto col = batch[ii];
auto col = page[ii];
for (int gid = 0; gid < ngroup; ++gid) {
double sum_grad = 0.0, sum_hess = 0.0;
for (auto& c : col) {
Expand Down
10 changes: 6 additions & 4 deletions src/predictor/cpu_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -360,18 +360,19 @@ class CPUPredictor : public Predictor {
// start collecting the prediction
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
// parallel over local batch
auto page = batch.GetView();
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) {
const int tid = omp_get_thread_num();
auto ridx = static_cast<size_t>(batch.base_rowid + i);
RegTree::FVec &feats = thread_temp_[tid];
feats.Fill(batch[i]);
feats.Fill(page[i]);
for (unsigned j = 0; j < ntree_limit; ++j) {
int tid = model.trees[j]->GetLeafIndex(feats);
preds[ridx * ntree_limit + j] = static_cast<bst_float>(tid);
}
feats.Drop(batch[i]);
feats.Drop(page[i]);
}
}
}
Expand Down Expand Up @@ -407,6 +408,7 @@ class CPUPredictor : public Predictor {
const std::vector<bst_float>& base_margin = info.base_margin_.HostVector();
// start collecting the contributions
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
auto page = batch.GetView();
// parallel over local batch
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
#pragma omp parallel for schedule(static)
Expand All @@ -417,7 +419,7 @@ class CPUPredictor : public Predictor {
// loop over all classes
for (int gid = 0; gid < ngroup; ++gid) {
bst_float* p_contribs = &contribs[(row_idx * ngroup + gid) * ncolumns];
feats.Fill(batch[i]);
feats.Fill(page[i]);
// calculate contributions
for (unsigned j = 0; j < ntree_limit; ++j) {
std::fill(this_tree_contribs.begin(), this_tree_contribs.end(), 0);
Expand All @@ -435,7 +437,7 @@ class CPUPredictor : public Predictor {
(tree_weights == nullptr ? 1 : (*tree_weights)[j]);
}
}
feats.Drop(batch[i]);
feats.Drop(page[i]);
// add base margin to BIAS
if (base_margin.size() != 0) {
p_contribs[ncolumns - 1] += base_margin[row_idx * ngroup + gid];
Expand Down
11 changes: 7 additions & 4 deletions src/tree/updater_basemaker-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ class BaseMaker: public TreeUpdater {
-std::numeric_limits<bst_float>::max());
// start accumulating statistics
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>()) {
auto page = batch.GetView();
for (bst_uint fid = 0; fid < batch.Size(); ++fid) {
auto c = batch[fid];
auto c = page[fid];
if (c.size() != 0) {
CHECK_LT(fid * 2, fminmax_.size());
fminmax_[fid * 2 + 0] =
Expand Down Expand Up @@ -249,8 +250,9 @@ class BaseMaker: public TreeUpdater {
inline void CorrectNonDefaultPositionByBatch(
const SparsePage &batch, const std::vector<bst_uint> &sorted_split_set,
const RegTree &tree) {
auto page = batch.GetView();
for (size_t fid = 0; fid < batch.Size(); ++fid) {
auto col = batch[fid];
auto col = page[fid];
auto it = std::lower_bound(sorted_split_set.begin(), sorted_split_set.end(), fid);

if (it != sorted_split_set.end() && *it == fid) {
Expand Down Expand Up @@ -308,10 +310,11 @@ class BaseMaker: public TreeUpdater {
std::vector<unsigned> fsplits;
this->GetSplitSet(nodes, tree, &fsplits);
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>()) {
auto page = batch.GetView();
for (auto fid : fsplits) {
auto col = batch[fid];
auto col = page[fid];
const auto ndata = static_cast<bst_omp_uint>(col.size());
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_uint ridx = col[j].index;
const bst_float fvalue = col[j].fvalue;
Expand Down
9 changes: 6 additions & 3 deletions src/tree/updater_colmaker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ class ColMaker: public TreeUpdater {
if (column_densities_.empty()) {
std::vector<size_t> column_size(dmat->Info().num_col_);
for (const auto &batch : dmat->GetBatches<SortedCSCPage>()) {
auto page = batch.GetView();
for (auto i = 0u; i < batch.Size(); i++) {
column_size[i] += batch[i].size();
column_size[i] += page[i].size();
}
}
column_densities_.resize(column_size.size());
Expand Down Expand Up @@ -447,13 +448,14 @@ class ColMaker: public TreeUpdater {
#endif // defined(_OPENMP)
{
dmlc::OMPException omp_handler;
auto page = batch.GetView();
#pragma omp parallel for schedule(dynamic, batch_size)
for (bst_omp_uint i = 0; i < num_features; ++i) {
omp_handler.Run([&]() {
auto evaluator = tree_evaluator_.GetEvaluator();
bst_feature_t const fid = feat_set[i];
int32_t const tid = omp_get_thread_num();
auto c = batch[fid];
auto c = page[fid];
const bool ind =
c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue;
if (colmaker_train_param_.NeedForwardSearch(
Expand Down Expand Up @@ -562,8 +564,9 @@ class ColMaker: public TreeUpdater {
std::sort(fsplits.begin(), fsplits.end());
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>()) {
auto page = batch.GetView();
for (auto fid : fsplits) {
auto col = batch[fid];
auto col = page[fid];
const auto ndata = static_cast<bst_omp_uint>(col.size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
Expand Down
12 changes: 7 additions & 5 deletions src/tree/updater_histmaker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -338,14 +338,15 @@ class CQHistMaker: public HistMaker {
thread_hist_.resize(omp_get_max_threads());
// start accumulating statistics
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>()) {
auto page = batch.GetView();
// start enumeration
const auto nsize = static_cast<bst_omp_uint>(fset.size());
#pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint i = 0; i < nsize; ++i) {
int fid = fset[i];
int offset = feat2workindex_[fid];
if (offset >= 0) {
this->UpdateHistCol(gpair, batch[fid], info, tree,
this->UpdateHistCol(gpair, page[fid], info, tree,
fset, offset,
&thread_hist_[omp_get_thread_num()]);
}
Expand Down Expand Up @@ -413,15 +414,15 @@ class CQHistMaker: public HistMaker {
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>()) {
// TWOPASS: use the real set + split set in the column iteration.
this->CorrectNonDefaultPositionByBatch(batch, fsplit_set_, tree);

auto page = batch.GetView();
// start enumeration
const auto nsize = static_cast<bst_omp_uint>(work_set_.size());
#pragma omp parallel for schedule(dynamic, 1)
#pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint i = 0; i < nsize; ++i) {
int fid = work_set_[i];
int offset = feat2workindex_[fid];
if (offset >= 0) {
this->UpdateSketchCol(gpair, batch[fid], tree,
this->UpdateSketchCol(gpair, page[fid], tree,
work_set_size, offset,
&thread_sketch_[omp_get_thread_num()]);
}
Expand Down Expand Up @@ -696,6 +697,7 @@ class GlobalProposalHistMaker: public CQHistMaker {
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>()) {
// TWOPASS: use the real set + split set in the column iteration.
this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set_, tree);
auto page = batch.GetView();

// start enumeration
const auto nsize = static_cast<bst_omp_uint>(this->work_set_.size());
Expand All @@ -704,7 +706,7 @@ class GlobalProposalHistMaker: public CQHistMaker {
int fid = this->work_set_[i];
int offset = this->feat2workindex_[fid];
if (offset >= 0) {
this->UpdateHistCol(gpair, batch[fid], info, tree,
this->UpdateHistCol(gpair, page[fid], info, tree,
fset, offset,
&this->thread_hist_[omp_get_thread_num()]);
}
Expand Down

0 comments on commit f2f7dd8

Please sign in to comment.