Skip to content

Commit

Permalink
Support for all primitive types from array. (#7003)
Browse files Browse the repository at this point in the history
* Change C API name.
* Test for all primitive types from array.
* Add native support for CPU 128 float.
* Convert boolean and float16 in Python.

* Fix dask version for now.
  • Loading branch information
trivialfis committed Jun 1, 2021
1 parent 816b789 commit ee4f51a
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 24 deletions.
2 changes: 1 addition & 1 deletion include/xgboost/c_api.h
Expand Up @@ -142,7 +142,7 @@ XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr,
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromArray(char const *data,
XGB_DLL int XGDMatrixCreateFromDense(char const *data,
char const *json_config,
DMatrixHandle *out);

Expand Down
18 changes: 13 additions & 5 deletions python-package/xgboost/core.py
Expand Up @@ -239,7 +239,18 @@ def _array_interface(data: np.ndarray) -> bytes:
interface = data.__array_interface__
if "mask" in interface:
interface["mask"] = interface["mask"].__array_interface__
interface_str = bytes(json.dumps(interface, indent=2), "utf-8")
interface_str = bytes(json.dumps(interface), "utf-8")
return interface_str


def _cuda_array_interface(data) -> bytes:
assert (
data.dtype.hasobject is False
), "Input data contains `object` dtype. Expecting numeric data."
interface = data.__cuda_array_interface__
if "mask" in interface:
interface["mask"] = interface["mask"].__cuda_array_interface__
interface_str = bytes(json.dumps(interface), "utf-8")
return interface_str


Expand Down Expand Up @@ -1948,10 +1959,7 @@ def inplace_predict(
from .data import _transform_cupy_array

data = _transform_cupy_array(data)
interface = data.__cuda_array_interface__
if "mask" in interface:
interface["mask"] = interface["mask"].__cuda_array_interface__
interface_str = bytes(json.dumps(interface, indent=2), "utf-8")
interface_str = _cuda_array_interface(data)
_check_call(
_LIB.XGBoosterPredictFromCudaArray(
self.handle,
Expand Down
17 changes: 8 additions & 9 deletions python-package/xgboost/data.py
Expand Up @@ -9,7 +9,8 @@

import numpy as np

from .core import c_array, _LIB, _check_call, c_str, _array_interface
from .core import c_array, _LIB, _check_call, c_str
from .core import _array_interface, _cuda_array_interface
from .core import DataIter, _ProxyDMatrix, DMatrix
from .compat import lazy_isinstance

Expand Down Expand Up @@ -105,7 +106,7 @@ def _is_numpy_array(data):


def _ensure_np_dtype(data, dtype):
if data.dtype.hasobject:
if data.dtype.hasobject or data.dtype in [np.float16, np.bool_]:
data = data.astype(np.float32, copy=False)
dtype = np.float32
return data, dtype
Expand Down Expand Up @@ -141,7 +142,7 @@ def _from_numpy_array(data, missing, nthread, feature_names, feature_types):
}
config = bytes(json.dumps(args), "utf-8")
_check_call(
_LIB.XGDMatrixCreateFromArray(
_LIB.XGDMatrixCreateFromDense(
_array_interface(data),
config,
ctypes.byref(handle),
Expand Down Expand Up @@ -416,21 +417,19 @@ def _is_cupy_array(data):


def _transform_cupy_array(data):
import cupy # pylint: disable=import-error
if not hasattr(data, '__cuda_array_interface__') and hasattr(
data, '__array__'):
import cupy # pylint: disable=import-error
data = cupy.array(data, copy=False)
if data.dtype.hasobject or data.dtype in [cupy.float16, cupy.bool_]:
data = data.astype(cupy.float32, copy=False)
return data


def _from_cupy_array(data, missing, nthread, feature_names, feature_types):
"""Initialize DMatrix from cupy ndarray."""
data = _transform_cupy_array(data)
interface = data.__cuda_array_interface__
if 'mask' in interface:
interface['mask'] = interface['mask'].__cuda_array_interface__
interface_str = bytes(json.dumps(interface, indent=2), 'utf-8')

interface_str = _cuda_array_interface(data)
handle = ctypes.c_void_p()
_check_call(
_LIB.XGDMatrixCreateFromArrayInterface(
Expand Down
2 changes: 1 addition & 1 deletion src/c_api/c_api.cc
Expand Up @@ -261,7 +261,7 @@ XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr,
API_END();
}

XGB_DLL int XGDMatrixCreateFromArray(char const *data,
XGB_DLL int XGDMatrixCreateFromDense(char const *data,
char const *c_json_config,
DMatrixHandle *out) {
API_BEGIN();
Expand Down
29 changes: 23 additions & 6 deletions src/data/array_interface.h
Expand Up @@ -42,7 +42,8 @@ struct ArrayInterfaceErrors {
return str.c_str();
}
static char const* Version() {
return "Only version <= 3 of `__cuda_array_interface__' are supported.";
return "Only version <= 3 of "
"`__cuda_array_interface__/__array_interface__' are supported.";
}
static char const* OfType(std::string const& type) {
static std::string str;
Expand Down Expand Up @@ -81,7 +82,7 @@ struct ArrayInterfaceErrors {
return "Other";
default:
LOG(FATAL) << "Invalid type code: " << c << " in `typestr' of input array."
<< "\nPlease verify the `__cuda_array_interface__' "
<< "\nPlease verify the `__cuda_array_interface__/__array_interface__' "
<< "of your input data complies to: "
<< "https://docs.scipy.org/doc/numpy/reference/arrays.interface.html"
<< "\nOr open an issue.";
Expand All @@ -90,7 +91,7 @@ struct ArrayInterfaceErrors {
}

static std::string UnSupportedType(StringView typestr) {
return TypeStr(typestr[1]) + " is not supported.";
return TypeStr(typestr[1]) + "-" + typestr[2] + " is not supported.";
}
};

Expand Down Expand Up @@ -135,8 +136,9 @@ class ArrayInterfaceHandler {
if (array.find("typestr") == array.cend()) {
LOG(FATAL) << "Missing `typestr' field for array interface";
}

auto typestr = get<String const>(array.at("typestr"));
CHECK_EQ(typestr.size(), 3) << ArrayInterfaceErrors::TypestrFormat();
CHECK(typestr.size() == 3 || typestr.size() == 4) << ArrayInterfaceErrors::TypestrFormat();
CHECK_NE(typestr.front(), '>') << ArrayInterfaceErrors::BigEndian();

if (array.find("shape") == array.cend()) {
Expand Down Expand Up @@ -295,7 +297,7 @@ class ArrayInterface {
}

public:
enum Type : std::int8_t { kF4, kF8, kI1, kI2, kI4, kI8, kU1, kU2, kU4, kU8 };
enum Type : std::int8_t { kF4, kF8, kF16, kI1, kI2, kI4, kI8, kU1, kU2, kU4, kU8 };

public:
ArrayInterface() = default;
Expand Down Expand Up @@ -331,7 +333,12 @@ class ArrayInterface {
}

void AssignType(StringView typestr) {
if (typestr[1] == 'f' && typestr[2] == '4') {
if (typestr.size() == 4 && typestr[1] == 'f' && typestr[2] == '1' &&
typestr[3] == '6') {
type = kF16;
CHECK(sizeof(long double) == 16)
<< "128-bit floating point is not supported on current platform.";
} else if (typestr[1] == 'f' && typestr[2] == '4') {
type = kF4;
} else if (typestr[1] == 'f' && typestr[2] == '8') {
type = kF8;
Expand Down Expand Up @@ -364,6 +371,16 @@ class ArrayInterface {
return func(reinterpret_cast<float *>(data));
case kF8:
return func(reinterpret_cast<double *>(data));
#ifdef __CUDA_ARCH__
case kF16: {
// CUDA device code doesn't support long double.
SPAN_CHECK(false);
return func(reinterpret_cast<double *>(data));
}
#else
case kF16:
return func(reinterpret_cast<long double *>(data));
#endif
case kI1:
return func(reinterpret_cast<int8_t *>(data));
case kI2:
Expand Down
4 changes: 2 additions & 2 deletions tests/ci_build/conda_env/macos_cpu_test.yml
Expand Up @@ -13,8 +13,8 @@ dependencies:
- scikit-learn
- pandas
- matplotlib
- dask
- distributed
- dask=2021.05.0
- distributed=2021.05.0
- graphviz
- python-graphviz
- hypothesis
Expand Down
58 changes: 58 additions & 0 deletions tests/python-gpu/test_gpu_prediction.py
Expand Up @@ -204,6 +204,7 @@ def predict_dense(x):
cpu_predt = reg.predict(X)
np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6)

@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_cudf())
def test_inplace_predict_cudf(self):
import cupy as cp
Expand Down Expand Up @@ -332,6 +333,7 @@ def test_predict_categorical_split(self, df):
rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False)
np.testing.assert_almost_equal(rmse, eval_history['train']['rmse'][-1], decimal=5)

@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.parametrize("n_classes", [2, 3])
def test_predict_dart(self, n_classes):
from sklearn.datasets import make_classification
Expand Down Expand Up @@ -378,3 +380,59 @@ def test_predict_dart(self, n_classes):

copied = cp.array(copied)
cp.testing.assert_allclose(inplace, copied, atol=1e-6)

@pytest.mark.skipif(**tm.no_cupy())
def test_dtypes(self):
import cupy as cp
rows = 1000
cols = 10
rng = cp.random.RandomState(1994)
orig = rng.randint(low=0, high=127, size=rows * cols).reshape(
rows, cols
)
y = rng.randint(low=0, high=127, size=rows)
dtrain = xgb.DMatrix(orig, label=y)
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain)

predt_orig = booster.inplace_predict(orig)
# all primitive types in numpy
for dtype in [
cp.signedinteger,
cp.byte,
cp.short,
cp.intc,
cp.int_,
cp.longlong,
cp.unsignedinteger,
cp.ubyte,
cp.ushort,
cp.uintc,
cp.uint,
cp.ulonglong,
cp.floating,
cp.half,
cp.single,
cp.double,
]:
X = cp.array(orig, dtype=dtype)
predt = booster.inplace_predict(X)
cp.testing.assert_allclose(predt, predt_orig)

# boolean
orig = cp.random.binomial(1, 0.5, size=rows * cols).reshape(
rows, cols
)
predt_orig = booster.inplace_predict(orig)
for dtype in [cp.bool8, cp.bool_]:
X = cp.array(orig, dtype=dtype)
predt = booster.inplace_predict(X)
cp.testing.assert_allclose(predt, predt_orig)

# unsupported types
for dtype in [
cp.complex64,
cp.complex128,
]:
X = cp.array(orig, dtype=dtype)
with pytest.raises(ValueError):
booster.inplace_predict(X)
48 changes: 48 additions & 0 deletions tests/python/test_predict.py
Expand Up @@ -237,3 +237,51 @@ def test_base_margin(self):
dtrain = xgb.DMatrix(self.X, self.y, base_margin=base_margin)
from_dmatrix = booster.predict(dtrain)
np.testing.assert_allclose(from_dmatrix, from_inplace)

def test_dtypes(self):
orig = self.rng.randint(low=0, high=127, size=self.rows * self.cols).reshape(
self.rows, self.cols
)
predt_orig = self.booster.inplace_predict(orig)
# all primitive types in numpy
for dtype in [
np.signedinteger,
np.byte,
np.short,
np.intc,
np.int_,
np.longlong,
np.unsignedinteger,
np.ubyte,
np.ushort,
np.uintc,
np.uint,
np.ulonglong,
np.floating,
np.half,
np.single,
np.double,
]:
X = np.array(orig, dtype=dtype)
predt = self.booster.inplace_predict(X)
np.testing.assert_allclose(predt, predt_orig)

# boolean
orig = self.rng.binomial(1, 0.5, size=self.rows * self.cols).reshape(
self.rows, self.cols
)
predt_orig = self.booster.inplace_predict(orig)
for dtype in [np.bool8, np.bool_]:
X = np.array(orig, dtype=dtype)
predt = self.booster.inplace_predict(X)
np.testing.assert_allclose(predt, predt_orig)

# unsupported types
for dtype in [
np.string_,
np.complex64,
np.complex128,
]:
X = np.array(orig, dtype=dtype)
with pytest.raises(ValueError):
self.booster.inplace_predict(X)

0 comments on commit ee4f51a

Please sign in to comment.