Skip to content

Commit

Permalink
Small cleanup to jvm iter adapter. (#9616)
Browse files Browse the repository at this point in the history
- Remove header dependency on c_api
- Remove remaining code for arrow.
  • Loading branch information
trivialfis committed Sep 28, 2023
1 parent 417c3ba commit d95be1c
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 76 deletions.
1 change: 1 addition & 0 deletions R-package/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ OBJECTS= \
$(PKGROOT)/src/gbm/gbtree_model.o \
$(PKGROOT)/src/gbm/gblinear.o \
$(PKGROOT)/src/gbm/gblinear_model.o \
$(PKGROOT)/src/data/adapter.o \
$(PKGROOT)/src/data/simple_dmatrix.o \
$(PKGROOT)/src/data/data.o \
$(PKGROOT)/src/data/sparse_page_raw_format.o \
Expand Down
1 change: 1 addition & 0 deletions R-package/src/Makevars.win
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ OBJECTS= \
$(PKGROOT)/src/gbm/gbtree_model.o \
$(PKGROOT)/src/gbm/gblinear.o \
$(PKGROOT)/src/gbm/gblinear_model.o \
$(PKGROOT)/src/data/adapter.o \
$(PKGROOT)/src/data/simple_dmatrix.o \
$(PKGROOT)/src/data/data.o \
$(PKGROOT)/src/data/sparse_page_raw_format.o \
Expand Down
4 changes: 2 additions & 2 deletions src/c_api/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,8 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
if (cache_info != nullptr) {
scache = cache_info;
}
xgboost::data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext,
XGBoostBatchCSR> adapter(data_handle, callback);
xgboost::data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR> adapter(
data_handle, callback);
xgboost_CHECK_C_ARG_PTR(out);
*out = new std::shared_ptr<DMatrix> {
DMatrix::Create(
Expand Down
28 changes: 28 additions & 0 deletions src/data/adapter.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/**
* Copyright 2019-2023, XGBoost Contributors
*/
#include "adapter.h"

#include "../c_api/c_api_error.h" // for API_BEGIN, API_END
#include "xgboost/c_api.h"

namespace xgboost::data {
template <typename DataIterHandle, typename XGBCallbackDataIterNext, typename XGBoostBatchCSR>
bool IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>::Next() {
if ((*next_callback_)(
data_handle_,
[](void *handle, XGBoostBatchCSR batch) -> int {
API_BEGIN();
static_cast<IteratorAdapter *>(handle)->SetData(batch);
API_END();
},
this) != 0) {
at_first_ = false;
return true;
} else {
return false;
}
}

template class IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>;
} // namespace xgboost::data
85 changes: 11 additions & 74 deletions src/data/adapter.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*!
* Copyright (c) 2019~2021 by Contributors
/**
* Copyright 2019-2023, XGBoost Contributors
* \file adapter.h
*/
#ifndef XGBOOST_DATA_ADAPTER_H_
Expand All @@ -16,7 +16,6 @@
#include <utility> // std::move
#include <vector>

#include "../c_api/c_api_error.h"
#include "../common/error_msg.h" // for MaxFeatureSize
#include "../common/math.h"
#include "array_interface.h"
Expand Down Expand Up @@ -742,8 +741,10 @@ class FileAdapter : dmlc::DataIter<FileAdapterBatch> {
dmlc::Parser<uint32_t>* parser_;
};

/*! \brief Data iterator that takes callback to return data, used in JVM package for
* accepting data iterator. */
/**
* @brief Data iterator that takes callback to return data, used in JVM package for accepting data
* iterator.
*/
template <typename DataIterHandle, typename XGBCallbackDataIterNext, typename XGBoostBatchCSR>
class IteratorAdapter : public dmlc::DataIter<FileAdapterBatch> {
public:
Expand All @@ -757,23 +758,9 @@ class IteratorAdapter : public dmlc::DataIter<FileAdapterBatch> {
CHECK(at_first_) << "Cannot reset IteratorAdapter";
}

bool Next() override {
if ((*next_callback_)(
data_handle_,
[](void *handle, XGBoostBatchCSR batch) -> int {
API_BEGIN();
static_cast<IteratorAdapter *>(handle)->SetData(batch);
API_END();
},
this) != 0) {
at_first_ = false;
return true;
} else {
return false;
}
}
[[nodiscard]] bool Next() override;

FileAdapterBatch const& Value() const override {
[[nodiscard]] FileAdapterBatch const& Value() const override {
return *batch_.get();
}

Expand Down Expand Up @@ -821,12 +808,12 @@ class IteratorAdapter : public dmlc::DataIter<FileAdapterBatch> {
block_.index = dmlc::BeginPtr(index_);
block_.value = dmlc::BeginPtr(value_);

batch_.reset(new FileAdapterBatch(&block_, row_offset_));
batch_ = std::make_unique<FileAdapterBatch>(&block_, row_offset_);
row_offset_ += offset_.size() - 1;
}

size_t NumColumns() const { return columns_; }
size_t NumRows() const { return kAdapterUnknownSize; }
[[nodiscard]] std::size_t NumColumns() const { return columns_; }
[[nodiscard]] std::size_t NumRows() const { return kAdapterUnknownSize; }

private:
std::vector<size_t> offset_;
Expand All @@ -848,56 +835,6 @@ class IteratorAdapter : public dmlc::DataIter<FileAdapterBatch> {
std::unique_ptr<FileAdapterBatch> batch_;
};

enum ColumnDType : uint8_t {
kUnknown,
kInt8,
kUInt8,
kInt16,
kUInt16,
kInt32,
kUInt32,
kInt64,
kUInt64,
kFloat,
kDouble
};

class Column {
public:
Column() = default;

Column(size_t col_idx, size_t length, size_t null_count, const uint8_t* bitmap)
: col_idx_{col_idx}, length_{length}, null_count_{null_count}, bitmap_{bitmap} {}

virtual ~Column() = default;

Column(const Column&) = delete;
Column& operator=(const Column&) = delete;
Column(Column&&) = delete;
Column& operator=(Column&&) = delete;

// whether the valid bit is set for this element
bool IsValid(size_t row_idx) const {
return (!bitmap_ || (bitmap_[row_idx/8] & (1 << (row_idx%8))));
}

virtual COOTuple GetElement(size_t row_idx) const = 0;

virtual bool IsValidElement(size_t row_idx) const = 0;

virtual std::vector<float> AsFloatVector() const = 0;

virtual std::vector<uint64_t> AsUint64Vector() const = 0;

size_t Length() const { return length_; }

protected:
size_t col_idx_;
size_t length_;
size_t null_count_;
const uint8_t* bitmap_;
};

class SparsePageAdapterBatch {
HostSparsePageView page_;

Expand Down

0 comments on commit d95be1c

Please sign in to comment.