From 5aa0e636e373117695c86ad44798d0c296d7a07b Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Wed, 11 Sep 2019 18:50:28 +0900 Subject: [PATCH 01/19] Add Tensor::Make functions These static factory functions validate the given arguments and then make a new Tensor object if the arguments are valid. --- cpp/src/arrow/tensor.cc | 71 +++++++++++++++++++++++++++++ cpp/src/arrow/tensor.h | 86 ++++++++++++++++++++++++++++++++---- cpp/src/arrow/tensor_test.cc | 66 +++++++++++++++++++++++++++ 3 files changed, 214 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index 3913876c9e25..dcc227bdd5cd 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -72,6 +72,77 @@ static void ComputeColumnMajorStrides(const FixedWidthType& type, } } +namespace { + +inline Status CheckTensorValidity(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape) { + if (!type) { + return Status::Invalid("Null type is supplied"); + } + if (!is_tensor_supported(type->id())) { + return Status::Invalid(type->ToString(), " is not valid data type for a tensor"); + } + if (!data) { + return Status::Invalid("Null data is supplied"); + } + if (shape.size() == 0) { + return Status::Invalid("Empty shape is supplied"); + } + return Status::OK(); +} + +Status CheckTensorStridesValidity(const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides) { + if (strides.size() != shape.size()) { + return Status::Invalid("strides must have the same length as shape"); + } + + std::vector last_index(shape); + const int64_t n = static_cast(shape.size()); + for (int64_t i = 0; i < n; ++i) { + --last_index[i]; + } + int64_t last_offset = Tensor::CalculateValueOffset(strides, last_index); + if (last_offset >= data->size()) { + return Status::Invalid("strides must not involve buffer over run"); + } + return Status::OK(); +} + +} // namespace + +Status Tensor::Make(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides, + const std::vector& dim_names, + std::shared_ptr* out) { + RETURN_NOT_OK(CheckTensorValidity(type, data, shape)); + if (!strides.empty()) { + RETURN_NOT_OK(CheckTensorStridesValidity(data, shape, strides)); + } + if (dim_names.size() > shape.size()) { + return Status::Invalid("too many dim_names are supplied"); + } + *out = std::make_shared(type, data, shape, strides, dim_names); + return Status::OK(); +} + +Status Tensor::Make(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides, std::shared_ptr* out) { + return Make(type, data, shape, strides, {}, out); +} + +Status Tensor::Make(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, std::shared_ptr* out) { + return Make(type, data, shape, {}, {}, out); +} + /// Constructor with strides and dimension names Tensor::Tensor(const std::shared_ptr& type, const std::shared_ptr& data, const std::vector& shape, const std::vector& strides, diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index 8f30d8514574..4b929e953557 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -57,6 +57,71 @@ class SparseTensorImpl; class ARROW_EXPORT Tensor { public: + /// \brief Create a Tensor with full parameters + /// + /// This factory function will return Status::Invalid when the parameters are + /// inconsistent + /// + /// \param[in] type The data type of the tensor values + /// \param[in] data The buffer of the tensor content + /// \param[in] shape The shape of the tensor + /// \param[in] strides The strides of the tensor + /// (if this is empty, the data assumed to be row-major) + /// \param[in] dim_names The names of the tensor dimensions + /// \param[out] out The result tensor + static Status Make(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides, + const std::vector& dim_names, + std::shared_ptr* out); + + /// \brief Create a Tensor with full parameters with empty dim_names + /// + /// This factory function will return Status::Invalid when the parameters are + /// inconsistent + /// + /// \param[in] type The data type of the tensor values + /// \param[in] data The buffer of the tensor content + /// \param[in] shape The shape of the tensor + /// \param[in] strides The strides of the tensor + /// \param[out] out The result tensor + static Status Make(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides, std::shared_ptr* out); + + /// \brief Create a Tensor with full parameters with empty strides, the data assumed to be row-major + /// + /// This factory function will return Status::Invalid when the parameters are + /// inconsistent + /// + /// \param[in] type The data type of the tensor values + /// \param[in] data The buffer of the tensor content + /// \param[in] shape The shape of the tensor + /// \param[in] dim_names The names of the tensor dimensions + /// \param[out] out The result tensor + static Status Make(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::vector& dim_names, std::shared_ptr* out) { + return Make(type, data, shape, {}, dim_names, out); + } + + /// \brief Create a Tensor with full parameters with empty dim_names, the data assumed + /// to be row-major + /// + /// This factory function will return Status::Invalid when the + /// parameters are inconsistent + /// + /// \param[in] type The data type of the tensor values + /// \param[in] data The buffer of the tensor content + /// \param[in] shape The shape of the tensor + /// \param[out] out The result tensor + static Status Make(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, std::shared_ptr* out); + virtual ~Tensor() = default; /// Constructor with no dimension names or strides, data assumed to be row-major @@ -108,11 +173,22 @@ class ARROW_EXPORT Tensor { /// Compute the number of non-zero values in the tensor Status CountNonZero(int64_t* result) const; + /// Return the offset of the given index on the given strides + static int64_t CalculateValueOffset(const std::vector& strides, + const std::vector& index) { + const int64_t n = static_cast(index.size()); + int64_t offset = 0; + for (int64_t i = 0; i < n; ++i) { + offset += index[i] * strides[i]; + } + return offset; + } + /// Returns the value at the given index without data-type and bounds checks template const typename ValueType::c_type& Value(const std::vector& index) const { using c_type = typename ValueType::c_type; - const int64_t offset = CalculateValueOffset(index); + const int64_t offset = Tensor::CalculateValueOffset(strides_, index); const c_type* ptr = reinterpret_cast(raw_data() + offset); return *ptr; } @@ -120,14 +196,6 @@ class ARROW_EXPORT Tensor { protected: Tensor() {} - int64_t CalculateValueOffset(const std::vector& index) const { - int64_t offset = 0; - for (size_t i = 0; i < index.size(); ++i) { - offset += index[i] * strides_[i]; - } - return offset; - } - std::shared_ptr type_; std::shared_ptr data_; std::vector shape_; diff --git a/cpp/src/arrow/tensor_test.cc b/cpp/src/arrow/tensor_test.cc index d752b6d22fd7..19c6834de180 100644 --- a/cpp/src/arrow/tensor_test.cc +++ b/cpp/src/arrow/tensor_test.cc @@ -38,6 +38,72 @@ void AssertCountNonZero(const Tensor& t, int64_t expected) { ASSERT_EQ(count, expected); } +TEST(TestTensor, Make) { + // without strides and dim_names + std::vector shape = {3, 6}; + std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto data = Buffer::Wrap(values); + std::shared_ptr tensor1; + ASSERT_OK(Tensor::Make(float64(), data, shape, &tensor1)); + + // without dim_names + std::vector strides = {sizeof(double) * 6, sizeof(double)}; + std::shared_ptr tensor2; + ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, strides, &tensor2)); + EXPECT_TRUE(tensor2->Equals(*tensor1)); + + // without strides + std::vector dim_names = {"foo", "bar"}; + std::shared_ptr tensor3; + ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, dim_names, &tensor3)); + EXPECT_TRUE(tensor3->Equals(*tensor1)); + EXPECT_TRUE(tensor3->Equals(*tensor2)); + + // supply all parameters + std::shared_ptr tensor4; + ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, strides, dim_names, &tensor4)); + EXPECT_TRUE(tensor4->Equals(*tensor1)); + EXPECT_TRUE(tensor4->Equals(*tensor2)); + EXPECT_TRUE(tensor4->Equals(*tensor3)); +} + +TEST(TestTensor, MakeFailureCases) { + std::vector shape = {3, 6}; + std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto data = Buffer::Wrap(values); + std::shared_ptr tensor1; + + // null type + ASSERT_RAISES(Invalid, Tensor::Make(nullptr, data, shape, &tensor1)); + + // invalid type + ASSERT_RAISES(Invalid, Tensor::Make(binary(), data, shape, &tensor1)); + + // null data + ASSERT_RAISES(Invalid, Tensor::Make(float64(), nullptr, shape, &tensor1)); + + // empty shape + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, {}, &tensor1)); + + // invalid stride length + ASSERT_RAISES(Invalid, + Tensor::Make(float64(), data, shape, {sizeof(double)}, &tensor1)); + ASSERT_RAISES(Invalid, + Tensor::Make(float64(), data, shape, + {sizeof(double), sizeof(double), sizeof(double)}, &tensor1)); + + // invalid stride values to involve buffer over run + ASSERT_RAISES(Invalid, + Tensor::Make(float64(), data, shape, + {sizeof(double) * 6, sizeof(double) * 2}, &tensor1)); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, + {sizeof(double) * 12, sizeof(double)}, &tensor1)); + + // too many dim_names are supplied + ASSERT_RAISES( + Invalid, Tensor::Make(float64(), data, shape, {}, {"foo", "bar", "baz"}, &tensor1)); +} + TEST(TestTensor, ZeroDim) { const int64_t values = 1; std::vector shape = {}; From b9fc3c96060f709277c0d46cca8f47e981f4722a Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Thu, 31 Oct 2019 17:48:12 +0900 Subject: [PATCH 02/19] Extract internal::ValidateTensorParameters --- cpp/src/arrow/tensor.cc | 27 ++++++++------------------- cpp/src/arrow/tensor.h | 31 ++++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index dcc227bdd5cd..4b2f769bec8c 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -113,12 +113,13 @@ Status CheckTensorStridesValidity(const std::shared_ptr& data, } // namespace -Status Tensor::Make(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape, - const std::vector& strides, - const std::vector& dim_names, - std::shared_ptr* out) { +namespace internal { + +Status ValidateTensorParameters(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides, + const std::vector& dim_names) { RETURN_NOT_OK(CheckTensorValidity(type, data, shape)); if (!strides.empty()) { RETURN_NOT_OK(CheckTensorStridesValidity(data, shape, strides)); @@ -126,22 +127,10 @@ Status Tensor::Make(const std::shared_ptr& type, if (dim_names.size() > shape.size()) { return Status::Invalid("too many dim_names are supplied"); } - *out = std::make_shared(type, data, shape, strides, dim_names); return Status::OK(); } -Status Tensor::Make(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape, - const std::vector& strides, std::shared_ptr* out) { - return Make(type, data, shape, strides, {}, out); -} - -Status Tensor::Make(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape, std::shared_ptr* out) { - return Make(type, data, shape, {}, {}, out); -} +} // namespace internal /// Constructor with strides and dimension names Tensor::Tensor(const std::shared_ptr& type, const std::shared_ptr& data, diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index 4b929e953557..5ca0adfe11ce 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -55,6 +55,16 @@ static inline bool is_tensor_supported(Type::type type_id) { template class SparseTensorImpl; +namespace internal { + +Status ValidateTensorParameters(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides, + const std::vector& dim_names); + +} + class ARROW_EXPORT Tensor { public: /// \brief Create a Tensor with full parameters @@ -74,7 +84,12 @@ class ARROW_EXPORT Tensor { const std::vector& shape, const std::vector& strides, const std::vector& dim_names, - std::shared_ptr* out); + std::shared_ptr* out) { + ARROW_RETURN_NOT_OK( + internal::ValidateTensorParameters(type, data, shape, strides, dim_names)); + *out = std::make_shared(type, data, shape, strides, dim_names); + return Status::OK(); + } /// \brief Create a Tensor with full parameters with empty dim_names /// @@ -89,9 +104,12 @@ class ARROW_EXPORT Tensor { static Status Make(const std::shared_ptr& type, const std::shared_ptr& data, const std::vector& shape, - const std::vector& strides, std::shared_ptr* out); + const std::vector& strides, std::shared_ptr* out) { + return Make(type, data, shape, strides, {}, out); + } - /// \brief Create a Tensor with full parameters with empty strides, the data assumed to be row-major + /// \brief Create a Tensor with full parameters with empty strides, the data assumed to + /// be row-major /// /// This factory function will return Status::Invalid when the parameters are /// inconsistent @@ -104,7 +122,8 @@ class ARROW_EXPORT Tensor { static Status Make(const std::shared_ptr& type, const std::shared_ptr& data, const std::vector& shape, - const std::vector& dim_names, std::shared_ptr* out) { + const std::vector& dim_names, + std::shared_ptr* out) { return Make(type, data, shape, {}, dim_names, out); } @@ -120,7 +139,9 @@ class ARROW_EXPORT Tensor { /// \param[out] out The result tensor static Status Make(const std::shared_ptr& type, const std::shared_ptr& data, - const std::vector& shape, std::shared_ptr* out); + const std::vector& shape, std::shared_ptr* out) { + return Make(type, data, shape, {}, {}, out); + } virtual ~Tensor() = default; From aec0ac5cdad198400d6a77880bd0973c326018b9 Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Fri, 15 Nov 2019 17:31:20 +0900 Subject: [PATCH 03/19] Remove Tensor::Make without strides --- cpp/src/arrow/tensor.h | 19 ------------------- cpp/src/arrow/tensor_test.cc | 2 +- 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index 5ca0adfe11ce..dc3bdbee7b25 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -108,25 +108,6 @@ class ARROW_EXPORT Tensor { return Make(type, data, shape, strides, {}, out); } - /// \brief Create a Tensor with full parameters with empty strides, the data assumed to - /// be row-major - /// - /// This factory function will return Status::Invalid when the parameters are - /// inconsistent - /// - /// \param[in] type The data type of the tensor values - /// \param[in] data The buffer of the tensor content - /// \param[in] shape The shape of the tensor - /// \param[in] dim_names The names of the tensor dimensions - /// \param[out] out The result tensor - static Status Make(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape, - const std::vector& dim_names, - std::shared_ptr* out) { - return Make(type, data, shape, {}, dim_names, out); - } - /// \brief Create a Tensor with full parameters with empty dim_names, the data assumed /// to be row-major /// diff --git a/cpp/src/arrow/tensor_test.cc b/cpp/src/arrow/tensor_test.cc index 19c6834de180..8025f98bd56e 100644 --- a/cpp/src/arrow/tensor_test.cc +++ b/cpp/src/arrow/tensor_test.cc @@ -55,7 +55,7 @@ TEST(TestTensor, Make) { // without strides std::vector dim_names = {"foo", "bar"}; std::shared_ptr tensor3; - ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, dim_names, &tensor3)); + ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, {}, dim_names, &tensor3)); EXPECT_TRUE(tensor3->Equals(*tensor1)); EXPECT_TRUE(tensor3->Equals(*tensor2)); From 9a2bfaf08770ef09d95a5ad83464bbdaacf6bbf1 Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Thu, 31 Oct 2019 18:20:52 +0900 Subject: [PATCH 04/19] Add NumericTensor::Make functions These static factory functions validate the given arguments and then make a new NumericTensor object if the arguments are valid. --- cpp/src/arrow/tensor.h | 53 ++++++++++++++++++++++++++++++++++++ cpp/src/arrow/tensor_test.cc | 29 ++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index dc3bdbee7b25..d3b2d9d81905 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -219,6 +219,59 @@ class NumericTensor : public Tensor { using TypeClass = TYPE; using value_type = typename TypeClass::c_type; + /// \brief Create a NumericTensor with full parameters + /// + /// This factory function will return Status::Invalid when the parameters are + /// inconsistent + /// + /// \param[in] data The buffer of the tensor content + /// \param[in] shape The shape of the tensor + /// \param[in] strides The strides of the tensor + /// (if this is empty, the data assumed to be row-major) + /// \param[in] dim_names The names of the tensor dimensions + /// \param[out] out The result tensor + static Status Make(const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides, + const std::vector& dim_names, + std::shared_ptr>* out) { + ARROW_RETURN_NOT_OK(internal::ValidateTensorParameters( + TypeTraits::type_singleton(), data, shape, strides, dim_names)); + *out = std::make_shared>(data, shape, strides, dim_names); + return Status::OK(); + } + + /// \brief Create a NumericTensor with full parameters with empty dim_names + /// + /// This factory function will return Status::Invalid when the parameters are + /// inconsistent + /// + /// \param[in] data The buffer of the tensor content + /// \param[in] shape The shape of the tensor + /// \param[in] strides The strides of the tensor + /// \param[out] out The result tensor + static Status Make(const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides, + std::shared_ptr>* out) { + return Make(data, shape, strides, {}, out); + } + + /// \brief Create a NumericTensor with full parameters with empty strides and empty + /// dim_names, the data assumed to be row-major + /// + /// This factory function will return Status::Invalid when the parameters are + /// inconsistent + /// + /// \param[in] data The buffer of the tensor content + /// \param[in] shape The shape of the tensor + /// \param[out] out The result tensor + static Status Make(const std::shared_ptr& data, + const std::vector& shape, + std::shared_ptr>* out) { + return Make(data, shape, {}, {}, out); + } + /// Constructor with non-negative strides and dimension names NumericTensor(const std::shared_ptr& data, const std::vector& shape, const std::vector& strides, diff --git a/cpp/src/arrow/tensor_test.cc b/cpp/src/arrow/tensor_test.cc index 8025f98bd56e..9627160e5e8c 100644 --- a/cpp/src/arrow/tensor_test.cc +++ b/cpp/src/arrow/tensor_test.cc @@ -424,6 +424,35 @@ REGISTER_TYPED_TEST_CASE_P(TestFloatTensor, Equals); INSTANTIATE_TYPED_TEST_CASE_P(Float32, TestFloatTensor, FloatType); INSTANTIATE_TYPED_TEST_CASE_P(Float64, TestFloatTensor, DoubleType); +TEST(TestNumericTensor, Make) { + // without strides and dim_names + std::vector shape = {3, 6}; + std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto data = Buffer::Wrap(values); + std::shared_ptr> tensor1; + ASSERT_OK(NumericTensor::Make(data, shape, &tensor1)); + + // without dim_names + std::vector strides = {sizeof(double) * 6, sizeof(double)}; + std::shared_ptr> tensor2; + ASSERT_OK(NumericTensor::Make(data, {3, 6}, strides, &tensor2)); + EXPECT_TRUE(tensor2->Equals(*tensor1)); + + // without strides + std::vector dim_names = {"foo", "bar"}; + std::shared_ptr> tensor3; + ASSERT_OK(NumericTensor::Make(data, {3, 6}, {}, dim_names, &tensor3)); + EXPECT_TRUE(tensor3->Equals(*tensor1)); + EXPECT_TRUE(tensor3->Equals(*tensor2)); + + // supply all parameters + std::shared_ptr> tensor4; + ASSERT_OK(NumericTensor::Make(data, {3, 6}, strides, dim_names, &tensor4)); + EXPECT_TRUE(tensor4->Equals(*tensor1)); + EXPECT_TRUE(tensor4->Equals(*tensor2)); + EXPECT_TRUE(tensor4->Equals(*tensor3)); +} + TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) { std::vector shape = {3, 4}; From a7ac21dd9194c17aadc0917efe60595041dfe2ab Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Mon, 18 Nov 2019 18:20:53 +0900 Subject: [PATCH 05/19] Add SparseTensorImpl::Make --- cpp/src/arrow/sparse_tensor.h | 48 +++++++++++- cpp/src/arrow/sparse_tensor_test.cc | 117 +++++++++++++++++++++++++++- 2 files changed, 160 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index 259284a5b400..67a26928c5cb 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -120,6 +120,15 @@ class ARROW_EXPORT SparseCOOIndex : public internal::SparseIndexBaseEquals(*other.indices()); } + Status ValidateShape(const std::vector& shape) const { + if (static_cast(coords_->shape()[1]) == shape.size()) { + return Status::OK(); + } + + return Status::Invalid( + "shape length is inconsistent with the coords matrix in COO index"); + } + protected: std::shared_ptr coords_; }; @@ -176,6 +185,23 @@ class ARROW_EXPORT SparseCSRIndex : public internal::SparseIndexBaseEquals(*other.indptr()) && indices()->Equals(*other.indices()); } + Status ValidateShape(const std::vector& shape) const { + if (shape.size() < 1) { + return Status::Invalid("shape length is too short"); + } + + if (shape.size() > 2) { + return Status::Invalid("shape length is too long"); + } + + if (indptr_->shape()[0] == shape[0] + 1) { + return Status::OK(); + } + + return Status::Invalid( + "shape length is inconsistent with the coords matrix in COO index"); + } + protected: std::shared_ptr indptr_; std::shared_ptr indices_; @@ -292,7 +318,27 @@ class SparseTensorImpl : public SparseTensor { const std::vector& dim_names = {}) : SparseTensorImpl(NULLPTR, type, NULLPTR, shape, dim_names) {} - /// \brief Construct a sparse tensor from a dense tensor + /// \brief Create a SparseTensor with full parameters + static Status Make(const std::shared_ptr& sparse_index, + const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::vector& dim_names, + std::shared_ptr>* out) { + if (!is_tensor_supported(type->id())) { + return Status::Invalid(type->ToString(), + " is not valid data type for a sparse tensor"); + } + ARROW_RETURN_NOT_OK(sparse_index->ValidateShape(shape)); + if (dim_names.size() > 0 && dim_names.size() != shape.size()) { + return Status::Invalid("dim_names length is inconsistent with shape"); + } + *out = std::make_shared>(sparse_index, type, data, + shape, dim_names); + return Status::OK(); + } + + /// \brief Create a sparse tensor from a dense tensor /// /// The dense tensor is re-encoded as a sparse index and a physical /// data buffer for the non-zero value. diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index e9788bc2d053..366885630b84 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -255,6 +255,50 @@ class TestSparseCOOTensorForIndexValueType TYPED_TEST_CASE_P(TestSparseCOOTensorForIndexValueType); +TYPED_TEST_P(TestSparseCOOTensorForIndexValueType, Make) { + using IndexValueType = TypeParam; + using c_index_value_type = typename IndexValueType::c_type; + + // Sparse representation: + // idx[0] = [0 0 0 0 0 0 1 1 1 1 1 1] + // idx[1] = [0 0 1 1 2 2 0 0 1 1 2 2] + // idx[2] = [0 2 1 3 0 2 1 3 0 2 1 3] + // data = [1 2 3 4 5 6 11 12 13 14 15 16] + std::vector coords_values = {0, 0, 0, 0, 0, 2, 0, 1, 1, 0, 1, 3, + 0, 2, 0, 0, 2, 2, 1, 0, 1, 1, 0, 3, + 1, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 3}; + constexpr int sizeof_index_value = sizeof(c_index_value_type); + auto si = this->MakeSparseCOOIndex( + {12, 3}, {sizeof_index_value * 3, sizeof_index_value}, coords_values); + + std::vector sparse_values = {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}; + auto sparse_data = Buffer::Wrap(sparse_values); + + std::shared_ptr st; + + // OK + ASSERT_OK(SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, + this->dim_names_, &st)); + + // OK with an empty dim_names + ASSERT_OK(SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, {}, &st)); + + // invalid data type + ASSERT_RAISES(Invalid, + SparseCOOTensor::Make(si, binary(), sparse_data, this->shape_, {}, &st)); + + // empty shape + ASSERT_RAISES(Invalid, SparseCOOTensor::Make(si, int64(), sparse_data, {}, {}, &st)); + + // sparse index and ndim (shape length) are inconsistent + ASSERT_RAISES(Invalid, + SparseCOOTensor::Make(si, int64(), sparse_data, {6, 4}, {}, &st)); + + // shape and dim_names are inconsistent + ASSERT_RAISES(Invalid, SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, + std::vector{"foo"}, &st)); +} + TYPED_TEST_P(TestSparseCOOTensorForIndexValueType, CreationWithRowMajorIndex) { using IndexValueType = TypeParam; using c_index_value_type = typename IndexValueType::c_type; @@ -267,7 +311,7 @@ TYPED_TEST_P(TestSparseCOOTensorForIndexValueType, CreationWithRowMajorIndex) { std::vector coords_values = {0, 0, 0, 0, 0, 2, 0, 1, 1, 0, 1, 3, 0, 2, 0, 0, 2, 2, 1, 0, 1, 1, 0, 3, 1, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 3}; - const int sizeof_index_value = sizeof(c_index_value_type); + constexpr int sizeof_index_value = sizeof(c_index_value_type); auto si = this->MakeSparseCOOIndex( {12, 3}, {sizeof_index_value * 3, sizeof_index_value}, coords_values); @@ -294,7 +338,7 @@ TYPED_TEST_P(TestSparseCOOTensorForIndexValueType, CreationWithColumnMajorIndex) std::vector coords_values = {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}; - const int sizeof_index_value = sizeof(c_index_value_type); + constexpr int sizeof_index_value = sizeof(c_index_value_type); auto si = this->MakeSparseCOOIndex( {12, 3}, {sizeof_index_value, sizeof_index_value * 12}, coords_values); @@ -322,7 +366,7 @@ TYPED_TEST_P(TestSparseCOOTensorForIndexValueType, // Row-major COO index const std::vector coords_shape = {12, 3}; - const int sizeof_index_value = sizeof(c_index_value_type); + constexpr int sizeof_index_value = sizeof(c_index_value_type); std::vector coords_values_row_major = { 0, 0, 0, 0, 0, 2, 0, 1, 1, 0, 1, 3, 0, 2, 0, 0, 2, 2, 1, 0, 1, 1, 0, 3, 1, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 3}; @@ -347,7 +391,7 @@ TYPED_TEST_P(TestSparseCOOTensorForIndexValueType, ASSERT_TRUE(st2->Equals(*st1)); } -REGISTER_TYPED_TEST_CASE_P(TestSparseCOOTensorForIndexValueType, +REGISTER_TYPED_TEST_CASE_P(TestSparseCOOTensorForIndexValueType, Make, CreationWithRowMajorIndex, CreationWithColumnMajorIndex, EqualityBetweenRowAndColumnMajorIndices); @@ -519,4 +563,69 @@ TEST_F(TestSparseCSRMatrix, TestToTensor) { ASSERT_OK(sparse_tensor->ToTensor(&dense_tensor)); ASSERT_TRUE(tensor.Equals(*dense_tensor)); } + +template +class TestSparseCSRMatrixForIndexValueType + : public TestSparseCSRMatrixBase {}; + +TYPED_TEST_CASE_P(TestSparseCSRMatrixForIndexValueType); + +TYPED_TEST_P(TestSparseCSRMatrixForIndexValueType, Make) { + using IndexValueType = TypeParam; + using c_index_value_type = typename IndexValueType::c_type; + + // Sparse representation: + std::vector indptr_values = {0, 2, 4, 6, 8, 10, 12}; + std::vector indices_values = {0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}; + + std::vector indptr_shape = {7}; + std::vector indices_shape = {12}; + + std::shared_ptr si; + ASSERT_OK(SparseCSRIndex::Make(TypeTraits::type_singleton(), + indptr_shape, indices_shape, Buffer::Wrap(indptr_values), + Buffer::Wrap(indices_values), &si)); + + std::vector sparse_values = {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}; + auto sparse_data = Buffer::Wrap(sparse_values); + + std::shared_ptr sm; + + // OK + ASSERT_OK(SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, + this->dim_names_, &sm)); + + // OK with an empty dim_names + ASSERT_OK(SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, {}, &sm)); + + // invalid data type + ASSERT_RAISES(Invalid, + SparseCSRMatrix::Make(si, binary(), sparse_data, this->shape_, {}, &sm)); + + // empty shape + ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {}, {}, &sm)); + + // sparse index and ndim (shape length) are inconsistent + ASSERT_RAISES(Invalid, + SparseCSRMatrix::Make(si, int64(), sparse_data, {4, 6}, {}, &sm)); + + // shape and dim_names are inconsistent + ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, + std::vector{"foo"}, &sm)); +} + +REGISTER_TYPED_TEST_CASE_P(TestSparseCSRMatrixForIndexValueType, Make); + +INSTANTIATE_TYPED_TEST_CASE_P(TestInt8, TestSparseCSRMatrixForIndexValueType, Int8Type); +INSTANTIATE_TYPED_TEST_CASE_P(TestUInt8, TestSparseCSRMatrixForIndexValueType, UInt8Type); +INSTANTIATE_TYPED_TEST_CASE_P(TestInt16, TestSparseCSRMatrixForIndexValueType, Int16Type); +INSTANTIATE_TYPED_TEST_CASE_P(TestUInt16, TestSparseCSRMatrixForIndexValueType, + UInt16Type); +INSTANTIATE_TYPED_TEST_CASE_P(TestInt32, TestSparseCSRMatrixForIndexValueType, Int32Type); +INSTANTIATE_TYPED_TEST_CASE_P(TestUInt32, TestSparseCSRMatrixForIndexValueType, + UInt32Type); +INSTANTIATE_TYPED_TEST_CASE_P(TestInt64, TestSparseCSRMatrixForIndexValueType, Int64Type); +INSTANTIATE_TYPED_TEST_CASE_P(TestUInt64, TestSparseCSRMatrixForIndexValueType, + UInt64Type); + } // namespace arrow From 1a2b5cad3cc949d0d204219c383a72caf2335940 Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Tue, 19 Nov 2019 15:59:18 +0900 Subject: [PATCH 06/19] Reject negative items in shape --- cpp/src/arrow/sparse_tensor.cc | 12 ++++++++++++ cpp/src/arrow/sparse_tensor.h | 16 +++++++++++----- cpp/src/arrow/sparse_tensor_test.cc | 12 ++++++++++-- cpp/src/arrow/tensor.cc | 4 ++++ cpp/src/arrow/tensor_test.cc | 3 +++ 5 files changed, 40 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index ef7a986bf058..957192c5845f 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -17,6 +17,7 @@ #include "arrow/sparse_tensor.h" +#include #include #include #include @@ -28,6 +29,17 @@ namespace arrow { +// ---------------------------------------------------------------------- +// SparseIndex + +Status SparseIndex::ValidateShape(const std::vector& shape) const { + if (!std::all_of(shape.begin(), shape.end(), [](int64_t x) { return x > 0; })) { + return Status::Invalid("Shape elements must be positive"); + } + + return Status::OK(); +} + namespace { // ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index 67a26928c5cb..40134588ac3c 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -63,6 +63,8 @@ class ARROW_EXPORT SparseIndex { /// \brief Return the string representation of the sparse index virtual std::string ToString() const = 0; + virtual Status ValidateShape(const std::vector& shape) const; + protected: SparseTensorFormat::type format_id_; int64_t non_zero_length_; @@ -120,7 +122,9 @@ class ARROW_EXPORT SparseCOOIndex : public internal::SparseIndexBaseEquals(*other.indices()); } - Status ValidateShape(const std::vector& shape) const { + Status ValidateShape(const std::vector& shape) const override { + ARROW_RETURN_NOT_OK(SparseIndex::ValidateShape(shape)); + if (static_cast(coords_->shape()[1]) == shape.size()) { return Status::OK(); } @@ -185,16 +189,18 @@ class ARROW_EXPORT SparseCSRIndex : public internal::SparseIndexBaseEquals(*other.indptr()) && indices()->Equals(*other.indices()); } - Status ValidateShape(const std::vector& shape) const { - if (shape.size() < 1) { - return Status::Invalid("shape length is too short"); + Status ValidateShape(const std::vector& shape) const override { + ARROW_RETURN_NOT_OK(SparseIndex::ValidateShape(shape)); + + if (shape.size() < 2) { + return Status::Invalid("shape length is too long"); } if (shape.size() > 2) { return Status::Invalid("shape length is too long"); } - if (indptr_->shape()[0] == shape[0] + 1) { + if (shape.size() == 0 || indptr_->shape()[0] == shape[0] + 1) { return Status::OK(); } diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 366885630b84..16af9c8dc1fb 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -287,8 +287,9 @@ TYPED_TEST_P(TestSparseCOOTensorForIndexValueType, Make) { ASSERT_RAISES(Invalid, SparseCOOTensor::Make(si, binary(), sparse_data, this->shape_, {}, &st)); - // empty shape - ASSERT_RAISES(Invalid, SparseCOOTensor::Make(si, int64(), sparse_data, {}, {}, &st)); + // negative items in shape + ASSERT_RAISES(Invalid, + SparseCOOTensor::Make(si, int64(), sparse_data, {2, -3, 4}, {}, &st)); // sparse index and ndim (shape length) are inconsistent ASSERT_RAISES(Invalid, @@ -605,6 +606,13 @@ TYPED_TEST_P(TestSparseCSRMatrixForIndexValueType, Make) { // empty shape ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {}, {}, &sm)); + // 1D shape + ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {24}, {}, &sm)); + + // negative items in shape + ASSERT_RAISES(Invalid, + SparseCSRMatrix::Make(si, int64(), sparse_data, {6, -4}, {}, &sm)); + // sparse index and ndim (shape length) are inconsistent ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {4, 6}, {}, &sm)); diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index 4b2f769bec8c..f0a850c06910 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -17,6 +17,7 @@ #include "arrow/tensor.h" +#include #include #include #include @@ -89,6 +90,9 @@ inline Status CheckTensorValidity(const std::shared_ptr& type, if (shape.size() == 0) { return Status::Invalid("Empty shape is supplied"); } + if (!std::all_of(shape.begin(), shape.end(), [](int64_t x) { return x > 0; })) { + return Status::Invalid("Shape elements must be positive"); + } return Status::OK(); } diff --git a/cpp/src/arrow/tensor_test.cc b/cpp/src/arrow/tensor_test.cc index 9627160e5e8c..0374911a19d7 100644 --- a/cpp/src/arrow/tensor_test.cc +++ b/cpp/src/arrow/tensor_test.cc @@ -85,6 +85,9 @@ TEST(TestTensor, MakeFailureCases) { // empty shape ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, {}, &tensor1)); + // negative items in shape + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, {-3, 6}, &tensor1)); + // invalid stride length ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, {sizeof(double)}, &tensor1)); From bf1a474ce48a2ed54b70b0f434e981f1b5c5117e Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Tue, 19 Nov 2019 20:50:23 +0900 Subject: [PATCH 07/19] SparseCOOIndex::Make and SparseCSRIndex::Make --- cpp/src/arrow/sparse_tensor.cc | 38 +++++++++++++++++---- cpp/src/arrow/sparse_tensor.h | 31 +++++++++++++++--- cpp/src/arrow/sparse_tensor_test.cc | 51 +++++++++++++++++++++++++++++ cpp/src/arrow/tensor.cc | 39 +++++++++++++++++----- cpp/src/arrow/tensor.h | 8 ++++- 5 files changed, 146 insertions(+), 21 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index 957192c5845f..9a8c72cc77cc 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -515,17 +515,27 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t // ---------------------------------------------------------------------- // SparseCOOIndex -Status SparseCOOIndex::Make(std::shared_ptr indices_type, +Status SparseCOOIndex::Make(const std::shared_ptr& indices_type, const std::vector& indices_shape, const std::vector& indices_strides, std::shared_ptr indices_data, std::shared_ptr* out) { + if (!is_integer(indices_type->id())) { + return Status::Invalid("Type of SparseCOOIndex indices must be integer"); + } + if (indices_shape.size() != 2) { + return Status::Invalid("SparseCOOIndex indices must be a matrix"); + } + if (!internal::IsTensorStridesContiguous(indices_type, indices_shape, + indices_strides)) { + return Status::Invalid("SparseCOOIndex indices must be contiguous"); + } *out = std::make_shared(std::make_shared( indices_type, indices_data, indices_shape, indices_strides)); return Status::OK(); } -Status SparseCOOIndex::Make(std::shared_ptr indices_type, +Status SparseCOOIndex::Make(const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indices_data, std::shared_ptr* out) { @@ -550,27 +560,41 @@ std::string SparseCOOIndex::ToString() const { return std::string("SparseCOOInde // ---------------------------------------------------------------------- // SparseCSRIndex -Status SparseCSRIndex::Make(const std::shared_ptr indices_type, +Status SparseCSRIndex::Make(const std::shared_ptr& indptr_type, + const std::shared_ptr& indices_type, const std::vector& indptr_shape, const std::vector& indices_shape, std::shared_ptr indptr_data, std::shared_ptr indices_data, std::shared_ptr* out) { + if (!is_integer(indptr_type->id())) { + return Status::Invalid("Type of SparseCSRIndex indptr must be integer"); + } + if (indptr_shape.size() != 1) { + return Status::Invalid("SparseCSRIndex indptr must be a vector"); + } + if (!is_integer(indices_type->id())) { + return Status::Invalid("Type of SparseCSRIndex indices must be integer"); + } + if (indices_shape.size() != 1) { + return Status::Invalid("SparseCSRIndex indices must be a vector"); + } *out = std::make_shared( - std::make_shared(indices_type, indptr_data, indptr_shape), + std::make_shared(indptr_type, indptr_data, indptr_shape), std::make_shared(indices_type, indices_data, indices_shape)); return Status::OK(); } -Status SparseCSRIndex::Make(const std::shared_ptr indices_type, +Status SparseCSRIndex::Make(const std::shared_ptr& indptr_type, + const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indptr_data, std::shared_ptr indices_data, std::shared_ptr* out) { std::vector indptr_shape({shape[0] + 1}); std::vector indices_shape({non_zero_length}); - return SparseCSRIndex::Make(indices_type, indptr_shape, indices_shape, indptr_data, - indices_data, out); + return SparseCSRIndex::Make(indptr_type, indices_type, indptr_shape, indices_shape, + indptr_data, indices_data, out); } // Constructor with two index vectors diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index 40134588ac3c..d131b552deaf 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -91,14 +91,14 @@ class ARROW_EXPORT SparseCOOIndex : public internal::SparseIndexBase indices_type, + static Status Make(const std::shared_ptr& indices_type, const std::vector& indices_shape, const std::vector& indices_strides, std::shared_ptr indices_data, std::shared_ptr* out); /// \brief Make SparseCOOIndex from sparse tensor's shape properties and data - static Status Make(const std::shared_ptr indices_type, + static Status Make(const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indices_data, std::shared_ptr* out); @@ -157,20 +157,43 @@ class ARROW_EXPORT SparseCSRIndex : public internal::SparseIndexBase indices_type, + static Status Make(const std::shared_ptr& indptr_type, + const std::shared_ptr& indices_type, const std::vector& indptr_shape, const std::vector& indices_shape, std::shared_ptr indptr_data, std::shared_ptr indices_data, std::shared_ptr* out); + /// \brief Make SparseCSRIndex from raw properties + static Status Make(const std::shared_ptr& indices_type, + const std::vector& indptr_shape, + const std::vector& indices_shape, + std::shared_ptr indptr_data, + std::shared_ptr indices_data, + std::shared_ptr* out) { + return Make(indices_type, indices_type, indptr_shape, indices_shape, indptr_data, + indices_data, out); + } + /// \brief Make SparseCSRIndex from sparse tensor's shape properties and data - static Status Make(const std::shared_ptr indices_type, + static Status Make(const std::shared_ptr& indptr_type, + const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indptr_data, std::shared_ptr indices_data, std::shared_ptr* out); + /// \brief Make SparseCSRIndex from sparse tensor's shape properties and data + static Status Make(const std::shared_ptr& indices_type, + const std::vector& shape, int64_t non_zero_length, + std::shared_ptr indptr_data, + std::shared_ptr indices_data, + std::shared_ptr* out) { + return Make(indices_type, indices_type, shape, non_zero_length, indptr_data, + indices_data, out); + } + /// \brief Construct SparseCSRIndex from two index vectors explicit SparseCSRIndex(const std::shared_ptr& indptr, const std::shared_ptr& indices); diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 16af9c8dc1fb..e4faa390cde8 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -47,6 +47,57 @@ static inline void AssertCOOIndex(const std::shared_ptr& sidx, const int } } +TEST(TestSparseCOOIndex, Make) { + std::vector values = {0, 0, 0, 0, 0, 2, 0, 1, 1, 0, 1, 3, 0, 2, 0, 0, 2, 2, + 1, 0, 1, 1, 0, 3, 1, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 3}; + auto data = Buffer::Wrap(values); + std::vector shape = {12, 3}; + std::vector stride = {3 * sizeof(int32_t), sizeof(int32_t)}; + + std::shared_ptr si; + + // OK + ASSERT_OK(SparseCOOIndex::Make(int32(), shape, stride, data, &si)); + + // Non-integer type + ASSERT_RAISES(Invalid, SparseCOOIndex::Make(float32(), shape, stride, data, &si)); + + // Non-matrix indices + ASSERT_RAISES(Invalid, SparseCOOIndex::Make(int32(), {4, 3, 4}, stride, data, &si)); + + // Non-contiguous indices + ASSERT_RAISES(Invalid, SparseCOOIndex::Make(int32(), {6, 3}, + {6 * sizeof(int32_t), 2 * sizeof(int32_t)}, + data, &si)); +} + +TEST(TestSparseCSRIndex, Make) { + std::vector indptr_values = {0, 2, 4, 6, 8, 10, 12}; + std::vector indices_values = {0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}; + auto indptr_data = Buffer::Wrap(indptr_values); + auto indices_data = Buffer::Wrap(indices_values); + std::vector indptr_shape = {7}; + std::vector indices_shape = {12}; + + std::shared_ptr si; + + // OK + ASSERT_OK(SparseCSRIndex::Make(int32(), indptr_shape, indices_shape, indptr_data, + indices_data, &si)); + + // Non-integer type + ASSERT_RAISES(Invalid, SparseCSRIndex::Make(float32(), indptr_shape, indices_shape, + indptr_data, indices_data, &si)); + + // Non-vector indptr shape + ASSERT_RAISES(Invalid, SparseCSRIndex::Make(int32(), {1, 2}, indices_shape, indptr_data, + indices_data, &si)); + + // Non-vector indices shape + ASSERT_RAISES(Invalid, SparseCSRIndex::Make(int32(), indptr_shape, {1, 2}, indptr_data, + indices_data, &si)); +} + template class TestSparseCOOTensorBase : public ::testing::Test { public: diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index f0a850c06910..c9cbfe258adf 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -75,6 +75,24 @@ static void ComputeColumnMajorStrides(const FixedWidthType& type, namespace { +inline bool IsTensorStridesRowMajor(const std::shared_ptr& type, + const std::vector& shape, + const std::vector& strides) { + std::vector c_strides; + const auto& fw_type = checked_cast(*type); + ComputeRowMajorStrides(fw_type, shape, &c_strides); + return strides == c_strides; +} + +inline bool IsTensorStridesColumnMajor(const std::shared_ptr& type, + const std::vector& shape, + const std::vector& strides) { + std::vector f_strides; + const auto& fw_type = checked_cast(*type); + ComputeColumnMajorStrides(fw_type, shape, &f_strides); + return strides == f_strides; +} + inline Status CheckTensorValidity(const std::shared_ptr& type, const std::shared_ptr& data, const std::vector& shape) { @@ -119,6 +137,13 @@ Status CheckTensorStridesValidity(const std::shared_ptr& data, namespace internal { +bool IsTensorStridesContiguous(const std::shared_ptr& type, + const std::vector& shape, + const std::vector& strides) { + return IsTensorStridesRowMajor(type, shape, strides) || + IsTensorStridesColumnMajor(type, shape, strides); +} + Status ValidateTensorParameters(const std::shared_ptr& type, const std::shared_ptr& data, const std::vector& shape, @@ -169,20 +194,16 @@ int64_t Tensor::size() const { return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies()); } -bool Tensor::is_contiguous() const { return is_row_major() || is_column_major(); } +bool Tensor::is_contiguous() const { + return internal::IsTensorStridesContiguous(type_, shape_, strides_); +} bool Tensor::is_row_major() const { - std::vector c_strides; - const auto& fw_type = checked_cast(*type_); - ComputeRowMajorStrides(fw_type, shape_, &c_strides); - return strides_ == c_strides; + return IsTensorStridesRowMajor(type_, shape_, strides_); } bool Tensor::is_column_major() const { - std::vector f_strides; - const auto& fw_type = checked_cast(*type_); - ComputeColumnMajorStrides(fw_type, shape_, &f_strides); - return strides_ == f_strides; + return IsTensorStridesColumnMajor(type_, shape_, strides_); } Type::type Tensor::type_id() const { return type_->id(); } diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index d3b2d9d81905..fed872595b27 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -57,13 +57,19 @@ class SparseTensorImpl; namespace internal { +ARROW_EXPORT +bool IsTensorStridesContiguous(const std::shared_ptr& type, + const std::vector& shape, + const std::vector& strides); + +ARROW_EXPORT Status ValidateTensorParameters(const std::shared_ptr& type, const std::shared_ptr& data, const std::vector& shape, const std::vector& strides, const std::vector& dim_names); -} +} // namespace internal class ARROW_EXPORT Tensor { public: From 9cbe5d86381ec3fada0f83d6a1756e6e07bc1781 Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Mon, 25 Nov 2019 14:58:56 +0900 Subject: [PATCH 08/19] Use Result --- cpp/src/arrow/ipc/read_write_test.cc | 9 +- cpp/src/arrow/ipc/reader.cc | 7 +- cpp/src/arrow/python/numpy_convert.cc | 4 +- cpp/src/arrow/sparse_tensor.cc | 50 +++++------ cpp/src/arrow/sparse_tensor.h | 111 +++++++++++++---------- cpp/src/arrow/sparse_tensor_test.cc | 125 ++++++++++++++------------ cpp/src/arrow/tensor.h | 66 ++++++-------- cpp/src/arrow/tensor_test.cc | 46 +++++----- 8 files changed, 210 insertions(+), 208 deletions(-) diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc index 370e997bcac2..06c9406c4032 100644 --- a/cpp/src/arrow/ipc/read_write_test.cc +++ b/cpp/src/arrow/ipc/read_write_test.cc @@ -1288,7 +1288,8 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCOOIndexRowMajor) { std::shared_ptr si; ASSERT_OK(SparseCOOIndex::Make(TypeTraits::type_singleton(), {12, 3}, {sizeof_index_value * 3, sizeof_index_value}, - Buffer::Wrap(coords_values), &si)); + Buffer::Wrap(coords_values)) + .Value(&si)); std::vector shape = {2, 3, 4}; std::vector dim_names = {"foo", "bar", "baz"}; @@ -1333,7 +1334,8 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCOOIndexColumnMajor) { std::shared_ptr si; ASSERT_OK(SparseCOOIndex::Make(TypeTraits::type_singleton(), {12, 3}, {sizeof_index_value, sizeof_index_value * 12}, - Buffer::Wrap(coords_values), &si)); + Buffer::Wrap(coords_values)) + .Value(&si)); std::vector shape = {2, 3, 4}; std::vector dim_names = {"foo", "bar", "baz"}; @@ -1358,7 +1360,8 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCSRIndex) { auto data = Buffer::Wrap(values); NumericTensor t(data, shape, {}, dim_names); std::shared_ptr st; - ASSERT_OK(SparseCSRMatrix::Make(t, TypeTraits::type_singleton(), &st)); + ASSERT_OK( + SparseCSRMatrix::Make(t, TypeTraits::type_singleton()).Value(&st)); this->CheckSparseTensorRoundTrip(*st); } diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 3d991f63e499..9d240491ee4c 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -1032,7 +1032,8 @@ Status ReadSparseTensorPayload(const IpcPayload& payload, RETURN_NOT_OK(internal::GetSparseCOOIndexMetadata( sparse_tensor->sparseIndex_as_SparseTensorIndexCOO(), &indices_type)); RETURN_NOT_OK(SparseCOOIndex::Make(indices_type, shape, non_zero_length, - payload.body_buffers[0], &sparse_index)); + payload.body_buffers[0]) + .Value(&sparse_index)); return MakeSparseTensorWithSparseCOOIndex(type, shape, dim_names, sparse_index, non_zero_length, payload.body_buffers[1], out); @@ -1047,8 +1048,8 @@ Status ReadSparseTensorPayload(const IpcPayload& payload, &indices_type)); ARROW_CHECK_EQ(indptr_type, indices_type); RETURN_NOT_OK(SparseCSRIndex::Make(indices_type, shape, non_zero_length, - payload.body_buffers[0], payload.body_buffers[1], - &sparse_index)); + payload.body_buffers[0], payload.body_buffers[1]) + .Value(&sparse_index)); return MakeSparseTensorWithSparseCSRIndex(type, shape, dim_names, sparse_index, non_zero_length, payload.body_buffers[2], out); diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc index dda1b8d534af..b397e27459ef 100644 --- a/cpp/src/arrow/python/numpy_convert.cc +++ b/cpp/src/arrow/python/numpy_convert.cc @@ -414,12 +414,12 @@ Status NdarraysToSparseCSRMatrix(MemoryPool* pool, PyObject* data_ao, PyObject* Status TensorToSparseCOOTensor(const std::shared_ptr& tensor, std::shared_ptr* out) { - return SparseCOOTensor::Make(*tensor, out); + return SparseCOOTensor::Make(*tensor).Value(out); } Status TensorToSparseCSRMatrix(const std::shared_ptr& tensor, std::shared_ptr* out) { - return SparseCSRMatrix::Make(*tensor, out); + return SparseCSRMatrix::Make(*tensor).Value(out); } } // namespace py diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index 9a8c72cc77cc..a6cccc0bac95 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -515,11 +515,10 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t // ---------------------------------------------------------------------- // SparseCOOIndex -Status SparseCOOIndex::Make(const std::shared_ptr& indices_type, - const std::vector& indices_shape, - const std::vector& indices_strides, - std::shared_ptr indices_data, - std::shared_ptr* out) { +Result> SparseCOOIndex::Make( + const std::shared_ptr& indices_type, + const std::vector& indices_shape, + const std::vector& indices_strides, std::shared_ptr indices_data) { if (!is_integer(indices_type->id())) { return Status::Invalid("Type of SparseCOOIndex indices must be integer"); } @@ -530,21 +529,18 @@ Status SparseCOOIndex::Make(const std::shared_ptr& indices_type, indices_strides)) { return Status::Invalid("SparseCOOIndex indices must be contiguous"); } - *out = std::make_shared(std::make_shared( + return std::make_shared(std::make_shared( indices_type, indices_data, indices_shape, indices_strides)); - return Status::OK(); } -Status SparseCOOIndex::Make(const std::shared_ptr& indices_type, - const std::vector& shape, int64_t non_zero_length, - std::shared_ptr indices_data, - std::shared_ptr* out) { +Result> SparseCOOIndex::Make( + const std::shared_ptr& indices_type, const std::vector& shape, + int64_t non_zero_length, std::shared_ptr indices_data) { auto ndim = static_cast(shape.size()); const int64_t elsize = sizeof(indices_type.get()); std::vector indices_shape({non_zero_length, ndim}); std::vector indices_strides({elsize, elsize * non_zero_length}); - return SparseCOOIndex::Make(indices_type, indices_shape, indices_strides, indices_data, - out); + return SparseCOOIndex::Make(indices_type, indices_shape, indices_strides, indices_data); } // Constructor with a contiguous NumericTensor @@ -560,13 +556,11 @@ std::string SparseCOOIndex::ToString() const { return std::string("SparseCOOInde // ---------------------------------------------------------------------- // SparseCSRIndex -Status SparseCSRIndex::Make(const std::shared_ptr& indptr_type, - const std::shared_ptr& indices_type, - const std::vector& indptr_shape, - const std::vector& indices_shape, - std::shared_ptr indptr_data, - std::shared_ptr indices_data, - std::shared_ptr* out) { +Result> SparseCSRIndex::Make( + const std::shared_ptr& indptr_type, + const std::shared_ptr& indices_type, + const std::vector& indptr_shape, const std::vector& indices_shape, + std::shared_ptr indptr_data, std::shared_ptr indices_data) { if (!is_integer(indptr_type->id())) { return Status::Invalid("Type of SparseCSRIndex indptr must be integer"); } @@ -579,22 +573,20 @@ Status SparseCSRIndex::Make(const std::shared_ptr& indptr_type, if (indices_shape.size() != 1) { return Status::Invalid("SparseCSRIndex indices must be a vector"); } - *out = std::make_shared( + return std::make_shared( std::make_shared(indptr_type, indptr_data, indptr_shape), std::make_shared(indices_type, indices_data, indices_shape)); - return Status::OK(); } -Status SparseCSRIndex::Make(const std::shared_ptr& indptr_type, - const std::shared_ptr& indices_type, - const std::vector& shape, int64_t non_zero_length, - std::shared_ptr indptr_data, - std::shared_ptr indices_data, - std::shared_ptr* out) { +Result> SparseCSRIndex::Make( + const std::shared_ptr& indptr_type, + const std::shared_ptr& indices_type, const std::vector& shape, + int64_t non_zero_length, std::shared_ptr indptr_data, + std::shared_ptr indices_data) { std::vector indptr_shape({shape[0] + 1}); std::vector indices_shape({non_zero_length}); return SparseCSRIndex::Make(indptr_type, indices_type, indptr_shape, indices_shape, - indptr_data, indices_data, out); + indptr_data, indices_data); } // Constructor with two index vectors diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index d131b552deaf..dad1c3ce46ce 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -20,6 +20,7 @@ #include #include +#include #include #include "arrow/tensor.h" @@ -91,17 +92,15 @@ class ARROW_EXPORT SparseCOOIndex : public internal::SparseIndexBase& indices_type, - const std::vector& indices_shape, - const std::vector& indices_strides, - std::shared_ptr indices_data, - std::shared_ptr* out); + static Result> Make( + const std::shared_ptr& indices_type, + const std::vector& indices_shape, + const std::vector& indices_strides, std::shared_ptr indices_data); /// \brief Make SparseCOOIndex from sparse tensor's shape properties and data - static Status Make(const std::shared_ptr& indices_type, - const std::vector& shape, int64_t non_zero_length, - std::shared_ptr indices_data, - std::shared_ptr* out); + static Result> Make( + const std::shared_ptr& indices_type, const std::vector& shape, + int64_t non_zero_length, std::shared_ptr indices_data); /// \brief Construct SparseCOOIndex from column-major NumericTensor explicit SparseCOOIndex(const std::shared_ptr& coords); @@ -157,41 +156,35 @@ class ARROW_EXPORT SparseCSRIndex : public internal::SparseIndexBase& indptr_type, - const std::shared_ptr& indices_type, - const std::vector& indptr_shape, - const std::vector& indices_shape, - std::shared_ptr indptr_data, - std::shared_ptr indices_data, - std::shared_ptr* out); + static Result> Make( + const std::shared_ptr& indptr_type, + const std::shared_ptr& indices_type, + const std::vector& indptr_shape, const std::vector& indices_shape, + std::shared_ptr indptr_data, std::shared_ptr indices_data); /// \brief Make SparseCSRIndex from raw properties - static Status Make(const std::shared_ptr& indices_type, - const std::vector& indptr_shape, - const std::vector& indices_shape, - std::shared_ptr indptr_data, - std::shared_ptr indices_data, - std::shared_ptr* out) { + static Result> Make( + const std::shared_ptr& indices_type, + const std::vector& indptr_shape, const std::vector& indices_shape, + std::shared_ptr indptr_data, std::shared_ptr indices_data) { return Make(indices_type, indices_type, indptr_shape, indices_shape, indptr_data, - indices_data, out); + indices_data); } /// \brief Make SparseCSRIndex from sparse tensor's shape properties and data - static Status Make(const std::shared_ptr& indptr_type, - const std::shared_ptr& indices_type, - const std::vector& shape, int64_t non_zero_length, - std::shared_ptr indptr_data, - std::shared_ptr indices_data, - std::shared_ptr* out); + static Result> Make( + const std::shared_ptr& indptr_type, + const std::shared_ptr& indices_type, const std::vector& shape, + int64_t non_zero_length, std::shared_ptr indptr_data, + std::shared_ptr indices_data); /// \brief Make SparseCSRIndex from sparse tensor's shape properties and data - static Status Make(const std::shared_ptr& indices_type, - const std::vector& shape, int64_t non_zero_length, - std::shared_ptr indptr_data, - std::shared_ptr indices_data, - std::shared_ptr* out) { + static Result> Make( + const std::shared_ptr& indices_type, const std::vector& shape, + int64_t non_zero_length, std::shared_ptr indptr_data, + std::shared_ptr indices_data) { return Make(indices_type, indices_type, shape, non_zero_length, indptr_data, - indices_data, out); + indices_data); } /// \brief Construct SparseCSRIndex from two index vectors @@ -348,12 +341,10 @@ class SparseTensorImpl : public SparseTensor { : SparseTensorImpl(NULLPTR, type, NULLPTR, shape, dim_names) {} /// \brief Create a SparseTensor with full parameters - static Status Make(const std::shared_ptr& sparse_index, - const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape, - const std::vector& dim_names, - std::shared_ptr>* out) { + static Result>> Make( + const std::shared_ptr& sparse_index, + const std::shared_ptr& type, const std::shared_ptr& data, + const std::vector& shape, const std::vector& dim_names) { if (!is_tensor_supported(type->id())) { return Status::Invalid(type->ToString(), " is not valid data type for a sparse tensor"); @@ -362,27 +353,51 @@ class SparseTensorImpl : public SparseTensor { if (dim_names.size() > 0 && dim_names.size() != shape.size()) { return Status::Invalid("dim_names length is inconsistent with shape"); } - *out = std::make_shared>(sparse_index, type, data, + return std::make_shared>(sparse_index, type, data, shape, dim_names); - return Status::OK(); } /// \brief Create a sparse tensor from a dense tensor /// /// The dense tensor is re-encoded as a sparse index and a physical /// data buffer for the non-zero value. - static Status Make(const Tensor& tensor, - const std::shared_ptr& index_value_type, MemoryPool* pool, - std::shared_ptr>* out) { + static Result>> Make( + const Tensor& tensor, const std::shared_ptr& index_value_type, + MemoryPool* pool) { std::shared_ptr sparse_index; std::shared_ptr data; ARROW_RETURN_NOT_OK(internal::MakeSparseTensorFromTensor( tensor, SparseIndexType::format_id, index_value_type, pool, &sparse_index, &data)); - *out = std::make_shared>( + return std::make_shared>( internal::checked_pointer_cast(sparse_index), tensor.type(), data, tensor.shape(), tensor.dim_names_); - return Status::OK(); + } + + static Result>> Make( + const Tensor& tensor, const std::shared_ptr& index_value_type) { + return Make(tensor, index_value_type, default_memory_pool()); + } + + static Result>> Make( + const Tensor& tensor, MemoryPool* pool) { + return Make(tensor, int64(), pool); + } + + static Result>> Make( + const Tensor& tensor) { + return Make(tensor, default_memory_pool()); + } + + /// \brief Create a sparse tensor from a dense tensor + /// + /// The dense tensor is re-encoded as a sparse index and a physical + /// data buffer for the non-zero value. + static Status Make(const Tensor& tensor, + const std::shared_ptr& index_value_type, MemoryPool* pool, + std::shared_ptr>* out) { + auto result = Make(tensor, index_value_type, pool); + return std::move(result).Value(out); } static Status Make(const Tensor& tensor, diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index e4faa390cde8..4091335e1307 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -54,21 +54,22 @@ TEST(TestSparseCOOIndex, Make) { std::vector shape = {12, 3}; std::vector stride = {3 * sizeof(int32_t), sizeof(int32_t)}; - std::shared_ptr si; - // OK - ASSERT_OK(SparseCOOIndex::Make(int32(), shape, stride, data, &si)); + auto res = SparseCOOIndex::Make(int32(), shape, stride, data); + ASSERT_OK(res); // Non-integer type - ASSERT_RAISES(Invalid, SparseCOOIndex::Make(float32(), shape, stride, data, &si)); + res = SparseCOOIndex::Make(float32(), shape, stride, data); + ASSERT_RAISES(Invalid, res); // Non-matrix indices - ASSERT_RAISES(Invalid, SparseCOOIndex::Make(int32(), {4, 3, 4}, stride, data, &si)); + res = SparseCOOIndex::Make(int32(), {4, 3, 4}, stride, data); + ASSERT_RAISES(Invalid, res); // Non-contiguous indices - ASSERT_RAISES(Invalid, SparseCOOIndex::Make(int32(), {6, 3}, - {6 * sizeof(int32_t), 2 * sizeof(int32_t)}, - data, &si)); + res = SparseCOOIndex::Make(int32(), {6, 3}, {6 * sizeof(int32_t), 2 * sizeof(int32_t)}, + data); + ASSERT_RAISES(Invalid, res); } TEST(TestSparseCSRIndex, Make) { @@ -79,23 +80,23 @@ TEST(TestSparseCSRIndex, Make) { std::vector indptr_shape = {7}; std::vector indices_shape = {12}; - std::shared_ptr si; - // OK - ASSERT_OK(SparseCSRIndex::Make(int32(), indptr_shape, indices_shape, indptr_data, - indices_data, &si)); + auto res = SparseCSRIndex::Make(int32(), indptr_shape, indices_shape, indptr_data, + indices_data); + ASSERT_OK(res); // Non-integer type - ASSERT_RAISES(Invalid, SparseCSRIndex::Make(float32(), indptr_shape, indices_shape, - indptr_data, indices_data, &si)); + res = SparseCSRIndex::Make(float32(), indptr_shape, indices_shape, indptr_data, + indices_data); + ASSERT_RAISES(Invalid, res); // Non-vector indptr shape - ASSERT_RAISES(Invalid, SparseCSRIndex::Make(int32(), {1, 2}, indices_shape, indptr_data, - indices_data, &si)); + res = SparseCSRIndex::Make(int32(), {1, 2}, indices_shape, indptr_data, indices_data); + ASSERT_RAISES(Invalid, res); // Non-vector indices shape - ASSERT_RAISES(Invalid, SparseCSRIndex::Make(int32(), indptr_shape, {1, 2}, indptr_data, - indices_data, &si)); + res = SparseCSRIndex::Make(int32(), indptr_shape, {1, 2}, indptr_data, indices_data); + ASSERT_RAISES(Invalid, res); } template @@ -122,9 +123,9 @@ class TestSparseCOOTensorBase : public ::testing::Test { 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; auto dense_data = Buffer::Wrap(dense_values); NumericTensor dense_tensor(dense_data, shape_, {}, dim_names_); - ASSERT_OK(SparseCOOTensor::Make(dense_tensor, - TypeTraits::type_singleton(), - &sparse_tensor_from_dense_)); + ASSERT_OK( + SparseCOOTensor::Make(dense_tensor, TypeTraits::type_singleton()) + .Value(&sparse_tensor_from_dense_)); } protected: @@ -188,7 +189,7 @@ TEST_F(TestSparseCOOTensor, CreationFromNumericTensor1D) { NumericTensor dense_vector(dense_data, dense_shape); std::shared_ptr st; - ASSERT_OK(SparseCOOTensor::Make(dense_vector, &st)); + ASSERT_OK(SparseCOOTensor::Make(dense_vector).Value(&st)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -214,7 +215,7 @@ TEST_F(TestSparseCOOTensor, CreationFromTensor) { Tensor tensor(int64(), buffer, this->shape_, {}, this->dim_names_); std::shared_ptr st; - ASSERT_OK(SparseCOOTensor::Make(tensor, &st)); + ASSERT_OK(SparseCOOTensor::Make(tensor).Value(&st)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -236,7 +237,7 @@ TEST_F(TestSparseCOOTensor, CreationFromNonContiguousTensor) { Tensor tensor(int64(), buffer, this->shape_, strides); std::shared_ptr st; - ASSERT_OK(SparseCOOTensor::Make(tensor, &st)); + ASSERT_OK(SparseCOOTensor::Make(tensor).Value(&st)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -254,9 +255,11 @@ TEST_F(TestSparseCOOTensor, TensorEquality) { NumericTensor tensor1(buffer1, this->shape_); NumericTensor tensor2(buffer2, this->shape_); - std::shared_ptr st1, st2; - ASSERT_OK(SparseCOOTensor::Make(tensor1, &st1)); - ASSERT_OK(SparseCOOTensor::Make(tensor2, &st2)); + std::shared_ptr st1; + ASSERT_OK(SparseCOOTensor::Make(tensor1).Value(&st1)); + + std::shared_ptr st2; + ASSERT_OK(SparseCOOTensor::Make(tensor2).Value(&st2)); ASSERT_TRUE(st1->Equals(*this->sparse_tensor_from_dense_)); ASSERT_FALSE(st1->Equals(*st2)); @@ -270,7 +273,7 @@ TEST_F(TestSparseCOOTensor, TestToTensor) { Tensor tensor(int64(), buffer, shape, {}, this->dim_names_); std::shared_ptr sparse_tensor; - ASSERT_OK(SparseCOOTensor::Make(tensor, &sparse_tensor)); + ASSERT_OK(SparseCOOTensor::Make(tensor).Value(&sparse_tensor)); ASSERT_EQ(5, sparse_tensor->non_zero_length()); ASSERT_TRUE(sparse_tensor->is_mutable()); @@ -328,27 +331,30 @@ TYPED_TEST_P(TestSparseCOOTensorForIndexValueType, Make) { std::shared_ptr st; // OK - ASSERT_OK(SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, - this->dim_names_, &st)); + auto res = + SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, this->dim_names_); + ASSERT_OK(res); // OK with an empty dim_names - ASSERT_OK(SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, {}, &st)); + res = SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, {}); + ASSERT_OK(res); // invalid data type - ASSERT_RAISES(Invalid, - SparseCOOTensor::Make(si, binary(), sparse_data, this->shape_, {}, &st)); + res = SparseCOOTensor::Make(si, binary(), sparse_data, this->shape_, {}); + ASSERT_RAISES(Invalid, res); // negative items in shape - ASSERT_RAISES(Invalid, - SparseCOOTensor::Make(si, int64(), sparse_data, {2, -3, 4}, {}, &st)); + res = SparseCOOTensor::Make(si, int64(), sparse_data, {2, -3, 4}, {}); + ASSERT_RAISES(Invalid, res); // sparse index and ndim (shape length) are inconsistent - ASSERT_RAISES(Invalid, - SparseCOOTensor::Make(si, int64(), sparse_data, {6, 4}, {}, &st)); + res = SparseCOOTensor::Make(si, int64(), sparse_data, {6, 4}, {}); + ASSERT_RAISES(Invalid, res); // shape and dim_names are inconsistent - ASSERT_RAISES(Invalid, SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, - std::vector{"foo"}, &st)); + res = SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, + std::vector{"foo"}); + ASSERT_RAISES(Invalid, res); } TYPED_TEST_P(TestSparseCOOTensorForIndexValueType, CreationWithRowMajorIndex) { @@ -479,9 +485,9 @@ class TestSparseCSRMatrixBase : public ::testing::Test { 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; auto dense_data = Buffer::Wrap(dense_values); NumericTensor dense_tensor(dense_data, shape_, {}, dim_names_); - ASSERT_OK(SparseCSRMatrix::Make(dense_tensor, - TypeTraits::type_singleton(), - &sparse_tensor_from_dense_)); + ASSERT_OK( + SparseCSRMatrix::Make(dense_tensor, TypeTraits::type_singleton()) + .Value(&sparse_tensor_from_dense_)); } protected: @@ -499,7 +505,8 @@ TEST_F(TestSparseCSRMatrix, CreationFromNumericTensor2D) { NumericTensor tensor(buffer, this->shape_); std::shared_ptr st1; - ASSERT_OK(SparseCSRMatrix::Make(tensor, &st1)); + ASSERT_OK(SparseCSRMatrix::Make(tensor).Value(&st1)); + auto st2 = this->sparse_tensor_from_dense_; CheckSparseIndexFormatType(SparseTensorFormat::CSR, *st1); @@ -550,7 +557,7 @@ TEST_F(TestSparseCSRMatrix, CreationFromNonContiguousTensor) { Tensor tensor(int64(), buffer, this->shape_, strides); std::shared_ptr st; - ASSERT_OK(SparseCSRMatrix::Make(tensor, &st)); + ASSERT_OK(SparseCSRMatrix::Make(tensor).Value(&st)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -592,8 +599,8 @@ TEST_F(TestSparseCSRMatrix, TensorEquality) { NumericTensor tensor2(buffer2, this->shape_); std::shared_ptr st1, st2; - ASSERT_OK(SparseCSRMatrix::Make(tensor1, &st1)); - ASSERT_OK(SparseCSRMatrix::Make(tensor2, &st2)); + ASSERT_OK(SparseCSRMatrix::Make(tensor1).Value(&st1)); + ASSERT_OK(SparseCSRMatrix::Make(tensor2).Value(&st2)); ASSERT_TRUE(st1->Equals(*this->sparse_tensor_from_dense_)); ASSERT_FALSE(st1->Equals(*st2)); @@ -607,7 +614,8 @@ TEST_F(TestSparseCSRMatrix, TestToTensor) { Tensor tensor(int64(), buffer, shape, {}, this->dim_names_); std::shared_ptr sparse_tensor; - ASSERT_OK(SparseCSRMatrix::Make(tensor, &sparse_tensor)); + ASSERT_OK(SparseCSRMatrix::Make(tensor).Value(&sparse_tensor)); + ASSERT_EQ(7, sparse_tensor->non_zero_length()); ASSERT_TRUE(sparse_tensor->is_mutable()); @@ -636,7 +644,8 @@ TYPED_TEST_P(TestSparseCSRMatrixForIndexValueType, Make) { std::shared_ptr si; ASSERT_OK(SparseCSRIndex::Make(TypeTraits::type_singleton(), indptr_shape, indices_shape, Buffer::Wrap(indptr_values), - Buffer::Wrap(indices_values), &si)); + Buffer::Wrap(indices_values)) + .Value(&si)); std::vector sparse_values = {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}; auto sparse_data = Buffer::Wrap(sparse_values); @@ -644,33 +653,31 @@ TYPED_TEST_P(TestSparseCSRMatrixForIndexValueType, Make) { std::shared_ptr sm; // OK - ASSERT_OK(SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, - this->dim_names_, &sm)); + ASSERT_OK( + SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, this->dim_names_)); // OK with an empty dim_names - ASSERT_OK(SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, {}, &sm)); + ASSERT_OK(SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, {})); // invalid data type ASSERT_RAISES(Invalid, - SparseCSRMatrix::Make(si, binary(), sparse_data, this->shape_, {}, &sm)); + SparseCSRMatrix::Make(si, binary(), sparse_data, this->shape_, {})); // empty shape - ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {}, {}, &sm)); + ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {}, {})); // 1D shape - ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {24}, {}, &sm)); + ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {24}, {})); // negative items in shape - ASSERT_RAISES(Invalid, - SparseCSRMatrix::Make(si, int64(), sparse_data, {6, -4}, {}, &sm)); + ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {6, -4}, {})); // sparse index and ndim (shape length) are inconsistent - ASSERT_RAISES(Invalid, - SparseCSRMatrix::Make(si, int64(), sparse_data, {4, 6}, {}, &sm)); + ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {4, 6}, {})); // shape and dim_names are inconsistent ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, - std::vector{"foo"}, &sm)); + std::vector{"foo"})); } REGISTER_TYPED_TEST_CASE_P(TestSparseCSRMatrixForIndexValueType, Make); diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index fed872595b27..cd77fb44b05c 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -25,6 +25,7 @@ #include "arrow/buffer.h" #include "arrow/compare.h" +#include "arrow/result.h" #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/macros.h" @@ -84,17 +85,14 @@ class ARROW_EXPORT Tensor { /// \param[in] strides The strides of the tensor /// (if this is empty, the data assumed to be row-major) /// \param[in] dim_names The names of the tensor dimensions - /// \param[out] out The result tensor - static Status Make(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape, - const std::vector& strides, - const std::vector& dim_names, - std::shared_ptr* out) { + static Result> Make(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides, + const std::vector& dim_names) { ARROW_RETURN_NOT_OK( internal::ValidateTensorParameters(type, data, shape, strides, dim_names)); - *out = std::make_shared(type, data, shape, strides, dim_names); - return Status::OK(); + return std::make_shared(type, data, shape, strides, dim_names); } /// \brief Create a Tensor with full parameters with empty dim_names @@ -106,12 +104,11 @@ class ARROW_EXPORT Tensor { /// \param[in] data The buffer of the tensor content /// \param[in] shape The shape of the tensor /// \param[in] strides The strides of the tensor - /// \param[out] out The result tensor - static Status Make(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape, - const std::vector& strides, std::shared_ptr* out) { - return Make(type, data, shape, strides, {}, out); + static Result> Make(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides) { + return Make(type, data, shape, strides, {}); } /// \brief Create a Tensor with full parameters with empty dim_names, the data assumed @@ -123,11 +120,10 @@ class ARROW_EXPORT Tensor { /// \param[in] type The data type of the tensor values /// \param[in] data The buffer of the tensor content /// \param[in] shape The shape of the tensor - /// \param[out] out The result tensor - static Status Make(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape, std::shared_ptr* out) { - return Make(type, data, shape, {}, {}, out); + static Result> Make(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape) { + return Make(type, data, shape, {}, {}); } virtual ~Tensor() = default; @@ -235,16 +231,12 @@ class NumericTensor : public Tensor { /// \param[in] strides The strides of the tensor /// (if this is empty, the data assumed to be row-major) /// \param[in] dim_names The names of the tensor dimensions - /// \param[out] out The result tensor - static Status Make(const std::shared_ptr& data, - const std::vector& shape, - const std::vector& strides, - const std::vector& dim_names, - std::shared_ptr>* out) { + static Result>> Make( + const std::shared_ptr& data, const std::vector& shape, + const std::vector& strides, const std::vector& dim_names) { ARROW_RETURN_NOT_OK(internal::ValidateTensorParameters( TypeTraits::type_singleton(), data, shape, strides, dim_names)); - *out = std::make_shared>(data, shape, strides, dim_names); - return Status::OK(); + return std::make_shared>(data, shape, strides, dim_names); } /// \brief Create a NumericTensor with full parameters with empty dim_names @@ -255,12 +247,10 @@ class NumericTensor : public Tensor { /// \param[in] data The buffer of the tensor content /// \param[in] shape The shape of the tensor /// \param[in] strides The strides of the tensor - /// \param[out] out The result tensor - static Status Make(const std::shared_ptr& data, - const std::vector& shape, - const std::vector& strides, - std::shared_ptr>* out) { - return Make(data, shape, strides, {}, out); + static Result>> Make( + const std::shared_ptr& data, const std::vector& shape, + const std::vector& strides) { + return Make(data, shape, strides, {}); } /// \brief Create a NumericTensor with full parameters with empty strides and empty @@ -271,11 +261,9 @@ class NumericTensor : public Tensor { /// /// \param[in] data The buffer of the tensor content /// \param[in] shape The shape of the tensor - /// \param[out] out The result tensor - static Status Make(const std::shared_ptr& data, - const std::vector& shape, - std::shared_ptr>* out) { - return Make(data, shape, {}, {}, out); + static Result>> Make( + const std::shared_ptr& data, const std::vector& shape) { + return Make(data, shape, {}, {}); } /// Constructor with non-negative strides and dimension names diff --git a/cpp/src/arrow/tensor_test.cc b/cpp/src/arrow/tensor_test.cc index 0374911a19d7..945eabde18dc 100644 --- a/cpp/src/arrow/tensor_test.cc +++ b/cpp/src/arrow/tensor_test.cc @@ -44,24 +44,24 @@ TEST(TestTensor, Make) { std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; auto data = Buffer::Wrap(values); std::shared_ptr tensor1; - ASSERT_OK(Tensor::Make(float64(), data, shape, &tensor1)); + ASSERT_OK(Tensor::Make(float64(), data, shape).Value(&tensor1)); // without dim_names std::vector strides = {sizeof(double) * 6, sizeof(double)}; std::shared_ptr tensor2; - ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, strides, &tensor2)); + ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, strides).Value(&tensor2)); EXPECT_TRUE(tensor2->Equals(*tensor1)); // without strides std::vector dim_names = {"foo", "bar"}; std::shared_ptr tensor3; - ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, {}, dim_names, &tensor3)); + ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, {}, dim_names).Value(&tensor3)); EXPECT_TRUE(tensor3->Equals(*tensor1)); EXPECT_TRUE(tensor3->Equals(*tensor2)); // supply all parameters std::shared_ptr tensor4; - ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, strides, dim_names, &tensor4)); + ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, strides, dim_names).Value(&tensor4)); EXPECT_TRUE(tensor4->Equals(*tensor1)); EXPECT_TRUE(tensor4->Equals(*tensor2)); EXPECT_TRUE(tensor4->Equals(*tensor3)); @@ -71,40 +71,35 @@ TEST(TestTensor, MakeFailureCases) { std::vector shape = {3, 6}; std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; auto data = Buffer::Wrap(values); - std::shared_ptr tensor1; // null type - ASSERT_RAISES(Invalid, Tensor::Make(nullptr, data, shape, &tensor1)); + ASSERT_RAISES(Invalid, Tensor::Make(nullptr, data, shape)); // invalid type - ASSERT_RAISES(Invalid, Tensor::Make(binary(), data, shape, &tensor1)); + ASSERT_RAISES(Invalid, Tensor::Make(binary(), data, shape)); // null data - ASSERT_RAISES(Invalid, Tensor::Make(float64(), nullptr, shape, &tensor1)); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), nullptr, shape)); // empty shape - ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, {}, &tensor1)); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, {})); // negative items in shape - ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, {-3, 6}, &tensor1)); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, {-3, 6})); // invalid stride length - ASSERT_RAISES(Invalid, - Tensor::Make(float64(), data, shape, {sizeof(double)}, &tensor1)); - ASSERT_RAISES(Invalid, - Tensor::Make(float64(), data, shape, - {sizeof(double), sizeof(double), sizeof(double)}, &tensor1)); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, {sizeof(double)})); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, + {sizeof(double), sizeof(double), sizeof(double)})); // invalid stride values to involve buffer over run - ASSERT_RAISES(Invalid, - Tensor::Make(float64(), data, shape, - {sizeof(double) * 6, sizeof(double) * 2}, &tensor1)); ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, - {sizeof(double) * 12, sizeof(double)}, &tensor1)); + {sizeof(double) * 6, sizeof(double) * 2})); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, + {sizeof(double) * 12, sizeof(double)})); // too many dim_names are supplied - ASSERT_RAISES( - Invalid, Tensor::Make(float64(), data, shape, {}, {"foo", "bar", "baz"}, &tensor1)); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, {}, {"foo", "bar", "baz"})); } TEST(TestTensor, ZeroDim) { @@ -433,24 +428,25 @@ TEST(TestNumericTensor, Make) { std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; auto data = Buffer::Wrap(values); std::shared_ptr> tensor1; - ASSERT_OK(NumericTensor::Make(data, shape, &tensor1)); + ASSERT_OK(NumericTensor::Make(data, shape).Value(&tensor1)); // without dim_names std::vector strides = {sizeof(double) * 6, sizeof(double)}; std::shared_ptr> tensor2; - ASSERT_OK(NumericTensor::Make(data, {3, 6}, strides, &tensor2)); + ASSERT_OK(NumericTensor::Make(data, {3, 6}, strides).Value(&tensor2)); EXPECT_TRUE(tensor2->Equals(*tensor1)); // without strides std::vector dim_names = {"foo", "bar"}; std::shared_ptr> tensor3; - ASSERT_OK(NumericTensor::Make(data, {3, 6}, {}, dim_names, &tensor3)); + ASSERT_OK(NumericTensor::Make(data, {3, 6}, {}, dim_names).Value(&tensor3)); EXPECT_TRUE(tensor3->Equals(*tensor1)); EXPECT_TRUE(tensor3->Equals(*tensor2)); // supply all parameters std::shared_ptr> tensor4; - ASSERT_OK(NumericTensor::Make(data, {3, 6}, strides, dim_names, &tensor4)); + ASSERT_OK( + NumericTensor::Make(data, {3, 6}, strides, dim_names).Value(&tensor4)); EXPECT_TRUE(tensor4->Equals(*tensor1)); EXPECT_TRUE(tensor4->Equals(*tensor2)); EXPECT_TRUE(tensor4->Equals(*tensor3)); From aa8017b9bce242f4a562df6cec1b53c273abe8f3 Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Tue, 26 Nov 2019 13:02:09 +0900 Subject: [PATCH 09/19] Rname to MakeSafe --- cpp/src/arrow/ipc/read_write_test.cc | 17 ++--- cpp/src/arrow/ipc/reader.cc | 9 +-- cpp/src/arrow/python/numpy_convert.cc | 4 +- cpp/src/arrow/sparse_tensor.cc | 14 ++-- cpp/src/arrow/sparse_tensor.h | 42 +++++++----- cpp/src/arrow/sparse_tensor_test.cc | 99 ++++++++++++++------------- cpp/src/arrow/tensor.h | 37 +++++----- cpp/src/arrow/tensor_test.cc | 54 ++++++++------- 8 files changed, 144 insertions(+), 132 deletions(-) diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc index 06c9406c4032..676230e9f643 100644 --- a/cpp/src/arrow/ipc/read_write_test.cc +++ b/cpp/src/arrow/ipc/read_write_test.cc @@ -1286,9 +1286,9 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCOOIndexRowMajor) { 1, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 3}; const int sizeof_index_value = sizeof(c_index_value_type); std::shared_ptr si; - ASSERT_OK(SparseCOOIndex::Make(TypeTraits::type_singleton(), {12, 3}, - {sizeof_index_value * 3, sizeof_index_value}, - Buffer::Wrap(coords_values)) + ASSERT_OK(SparseCOOIndex::MakeSafe( + TypeTraits::type_singleton(), {12, 3}, + {sizeof_index_value * 3, sizeof_index_value}, Buffer::Wrap(coords_values)) .Value(&si)); std::vector shape = {2, 3, 4}; @@ -1332,9 +1332,10 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCOOIndexColumnMajor) { 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}; const int sizeof_index_value = sizeof(c_index_value_type); std::shared_ptr si; - ASSERT_OK(SparseCOOIndex::Make(TypeTraits::type_singleton(), {12, 3}, - {sizeof_index_value, sizeof_index_value * 12}, - Buffer::Wrap(coords_values)) + ASSERT_OK(SparseCOOIndex::MakeSafe(TypeTraits::type_singleton(), + {12, 3}, + {sizeof_index_value, sizeof_index_value * 12}, + Buffer::Wrap(coords_values)) .Value(&si)); std::vector shape = {2, 3, 4}; @@ -1360,8 +1361,8 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCSRIndex) { auto data = Buffer::Wrap(values); NumericTensor t(data, shape, {}, dim_names); std::shared_ptr st; - ASSERT_OK( - SparseCSRMatrix::Make(t, TypeTraits::type_singleton()).Value(&st)); + ASSERT_OK(SparseCSRMatrix::MakeSafe(t, TypeTraits::type_singleton()) + .Value(&st)); this->CheckSparseTensorRoundTrip(*st); } diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 9d240491ee4c..a5cdf996448f 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -1031,8 +1031,8 @@ Status ReadSparseTensorPayload(const IpcPayload& payload, std::shared_ptr indices_type; RETURN_NOT_OK(internal::GetSparseCOOIndexMetadata( sparse_tensor->sparseIndex_as_SparseTensorIndexCOO(), &indices_type)); - RETURN_NOT_OK(SparseCOOIndex::Make(indices_type, shape, non_zero_length, - payload.body_buffers[0]) + RETURN_NOT_OK(SparseCOOIndex::MakeSafe(indices_type, shape, non_zero_length, + payload.body_buffers[0]) .Value(&sparse_index)); return MakeSparseTensorWithSparseCOOIndex(type, shape, dim_names, sparse_index, non_zero_length, payload.body_buffers[1], @@ -1047,8 +1047,9 @@ Status ReadSparseTensorPayload(const IpcPayload& payload, sparse_tensor->sparseIndex_as_SparseMatrixIndexCSR(), &indptr_type, &indices_type)); ARROW_CHECK_EQ(indptr_type, indices_type); - RETURN_NOT_OK(SparseCSRIndex::Make(indices_type, shape, non_zero_length, - payload.body_buffers[0], payload.body_buffers[1]) + RETURN_NOT_OK(SparseCSRIndex::MakeSafe(indices_type, shape, non_zero_length, + payload.body_buffers[0], + payload.body_buffers[1]) .Value(&sparse_index)); return MakeSparseTensorWithSparseCSRIndex(type, shape, dim_names, sparse_index, non_zero_length, payload.body_buffers[2], diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc index b397e27459ef..a049760915f3 100644 --- a/cpp/src/arrow/python/numpy_convert.cc +++ b/cpp/src/arrow/python/numpy_convert.cc @@ -414,12 +414,12 @@ Status NdarraysToSparseCSRMatrix(MemoryPool* pool, PyObject* data_ao, PyObject* Status TensorToSparseCOOTensor(const std::shared_ptr& tensor, std::shared_ptr* out) { - return SparseCOOTensor::Make(*tensor).Value(out); + return SparseCOOTensor::MakeSafe(*tensor).Value(out); } Status TensorToSparseCSRMatrix(const std::shared_ptr& tensor, std::shared_ptr* out) { - return SparseCSRMatrix::Make(*tensor).Value(out); + return SparseCSRMatrix::MakeSafe(*tensor).Value(out); } } // namespace py diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index a6cccc0bac95..dd582aa1f098 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -515,7 +515,7 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t // ---------------------------------------------------------------------- // SparseCOOIndex -Result> SparseCOOIndex::Make( +Result> SparseCOOIndex::MakeSafe( const std::shared_ptr& indices_type, const std::vector& indices_shape, const std::vector& indices_strides, std::shared_ptr indices_data) { @@ -533,14 +533,14 @@ Result> SparseCOOIndex::Make( indices_type, indices_data, indices_shape, indices_strides)); } -Result> SparseCOOIndex::Make( +Result> SparseCOOIndex::MakeSafe( const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indices_data) { auto ndim = static_cast(shape.size()); const int64_t elsize = sizeof(indices_type.get()); std::vector indices_shape({non_zero_length, ndim}); std::vector indices_strides({elsize, elsize * non_zero_length}); - return SparseCOOIndex::Make(indices_type, indices_shape, indices_strides, indices_data); + return MakeSafe(indices_type, indices_shape, indices_strides, indices_data); } // Constructor with a contiguous NumericTensor @@ -556,7 +556,7 @@ std::string SparseCOOIndex::ToString() const { return std::string("SparseCOOInde // ---------------------------------------------------------------------- // SparseCSRIndex -Result> SparseCSRIndex::Make( +Result> SparseCSRIndex::MakeSafe( const std::shared_ptr& indptr_type, const std::shared_ptr& indices_type, const std::vector& indptr_shape, const std::vector& indices_shape, @@ -578,15 +578,15 @@ Result> SparseCSRIndex::Make( std::make_shared(indices_type, indices_data, indices_shape)); } -Result> SparseCSRIndex::Make( +Result> SparseCSRIndex::MakeSafe( const std::shared_ptr& indptr_type, const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indptr_data, std::shared_ptr indices_data) { std::vector indptr_shape({shape[0] + 1}); std::vector indices_shape({non_zero_length}); - return SparseCSRIndex::Make(indptr_type, indices_type, indptr_shape, indices_shape, - indptr_data, indices_data); + return MakeSafe(indptr_type, indices_type, indptr_shape, indices_shape, indptr_data, + indices_data); } // Constructor with two index vectors diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index dad1c3ce46ce..36fa4dc2ba1d 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -92,13 +92,13 @@ class ARROW_EXPORT SparseCOOIndex : public internal::SparseIndexBase> Make( + static Result> MakeSafe( const std::shared_ptr& indices_type, const std::vector& indices_shape, const std::vector& indices_strides, std::shared_ptr indices_data); /// \brief Make SparseCOOIndex from sparse tensor's shape properties and data - static Result> Make( + static Result> MakeSafe( const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indices_data); @@ -156,35 +156,35 @@ class ARROW_EXPORT SparseCSRIndex : public internal::SparseIndexBase> Make( + static Result> MakeSafe( const std::shared_ptr& indptr_type, const std::shared_ptr& indices_type, const std::vector& indptr_shape, const std::vector& indices_shape, std::shared_ptr indptr_data, std::shared_ptr indices_data); /// \brief Make SparseCSRIndex from raw properties - static Result> Make( + static Result> MakeSafe( const std::shared_ptr& indices_type, const std::vector& indptr_shape, const std::vector& indices_shape, std::shared_ptr indptr_data, std::shared_ptr indices_data) { - return Make(indices_type, indices_type, indptr_shape, indices_shape, indptr_data, - indices_data); + return MakeSafe(indices_type, indices_type, indptr_shape, indices_shape, indptr_data, + indices_data); } /// \brief Make SparseCSRIndex from sparse tensor's shape properties and data - static Result> Make( + static Result> MakeSafe( const std::shared_ptr& indptr_type, const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indptr_data, std::shared_ptr indices_data); /// \brief Make SparseCSRIndex from sparse tensor's shape properties and data - static Result> Make( + static Result> MakeSafe( const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indptr_data, std::shared_ptr indices_data) { - return Make(indices_type, indices_type, shape, non_zero_length, indptr_data, - indices_data); + return MakeSafe(indices_type, indices_type, shape, non_zero_length, indptr_data, + indices_data); } /// \brief Construct SparseCSRIndex from two index vectors @@ -341,7 +341,7 @@ class SparseTensorImpl : public SparseTensor { : SparseTensorImpl(NULLPTR, type, NULLPTR, shape, dim_names) {} /// \brief Create a SparseTensor with full parameters - static Result>> Make( + static Result>> MakeSafe( const std::shared_ptr& sparse_index, const std::shared_ptr& type, const std::shared_ptr& data, const std::vector& shape, const std::vector& dim_names) { @@ -361,7 +361,7 @@ class SparseTensorImpl : public SparseTensor { /// /// The dense tensor is re-encoded as a sparse index and a physical /// data buffer for the non-zero value. - static Result>> Make( + static Result>> MakeSafe( const Tensor& tensor, const std::shared_ptr& index_value_type, MemoryPool* pool) { std::shared_ptr sparse_index; @@ -374,43 +374,47 @@ class SparseTensorImpl : public SparseTensor { data, tensor.shape(), tensor.dim_names_); } - static Result>> Make( + static Result>> MakeSafe( const Tensor& tensor, const std::shared_ptr& index_value_type) { - return Make(tensor, index_value_type, default_memory_pool()); + return MakeSafe(tensor, index_value_type, default_memory_pool()); } - static Result>> Make( + static Result>> MakeSafe( const Tensor& tensor, MemoryPool* pool) { - return Make(tensor, int64(), pool); + return MakeSafe(tensor, int64(), pool); } - static Result>> Make( + static Result>> MakeSafe( const Tensor& tensor) { - return Make(tensor, default_memory_pool()); + return MakeSafe(tensor, default_memory_pool()); } /// \brief Create a sparse tensor from a dense tensor /// /// The dense tensor is re-encoded as a sparse index and a physical /// data buffer for the non-zero value. + ARROW_DEPRECATED("Use MakeSafe") static Status Make(const Tensor& tensor, const std::shared_ptr& index_value_type, MemoryPool* pool, std::shared_ptr>* out) { - auto result = Make(tensor, index_value_type, pool); + auto result = MakeSafe(tensor, index_value_type, pool); return std::move(result).Value(out); } + ARROW_DEPRECATED("Use MakeSafe") static Status Make(const Tensor& tensor, const std::shared_ptr& index_value_type, std::shared_ptr>* out) { return Make(tensor, index_value_type, default_memory_pool(), out); } + ARROW_DEPRECATED("Use MakeSafe") static Status Make(const Tensor& tensor, MemoryPool* pool, std::shared_ptr>* out) { return Make(tensor, int64(), pool, out); } + ARROW_DEPRECATED("Use MakeSafe") static Status Make(const Tensor& tensor, std::shared_ptr>* out) { return Make(tensor, default_memory_pool(), out); diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 4091335e1307..14003057973d 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -55,20 +55,20 @@ TEST(TestSparseCOOIndex, Make) { std::vector stride = {3 * sizeof(int32_t), sizeof(int32_t)}; // OK - auto res = SparseCOOIndex::Make(int32(), shape, stride, data); + auto res = SparseCOOIndex::MakeSafe(int32(), shape, stride, data); ASSERT_OK(res); // Non-integer type - res = SparseCOOIndex::Make(float32(), shape, stride, data); + res = SparseCOOIndex::MakeSafe(float32(), shape, stride, data); ASSERT_RAISES(Invalid, res); // Non-matrix indices - res = SparseCOOIndex::Make(int32(), {4, 3, 4}, stride, data); + res = SparseCOOIndex::MakeSafe(int32(), {4, 3, 4}, stride, data); ASSERT_RAISES(Invalid, res); // Non-contiguous indices - res = SparseCOOIndex::Make(int32(), {6, 3}, {6 * sizeof(int32_t), 2 * sizeof(int32_t)}, - data); + res = SparseCOOIndex::MakeSafe(int32(), {6, 3}, + {6 * sizeof(int32_t), 2 * sizeof(int32_t)}, data); ASSERT_RAISES(Invalid, res); } @@ -81,21 +81,23 @@ TEST(TestSparseCSRIndex, Make) { std::vector indices_shape = {12}; // OK - auto res = SparseCSRIndex::Make(int32(), indptr_shape, indices_shape, indptr_data, - indices_data); + auto res = SparseCSRIndex::MakeSafe(int32(), indptr_shape, indices_shape, indptr_data, + indices_data); ASSERT_OK(res); // Non-integer type - res = SparseCSRIndex::Make(float32(), indptr_shape, indices_shape, indptr_data, - indices_data); + res = SparseCSRIndex::MakeSafe(float32(), indptr_shape, indices_shape, indptr_data, + indices_data); ASSERT_RAISES(Invalid, res); // Non-vector indptr shape - res = SparseCSRIndex::Make(int32(), {1, 2}, indices_shape, indptr_data, indices_data); + res = + SparseCSRIndex::MakeSafe(int32(), {1, 2}, indices_shape, indptr_data, indices_data); ASSERT_RAISES(Invalid, res); // Non-vector indices shape - res = SparseCSRIndex::Make(int32(), indptr_shape, {1, 2}, indptr_data, indices_data); + res = + SparseCSRIndex::MakeSafe(int32(), indptr_shape, {1, 2}, indptr_data, indices_data); ASSERT_RAISES(Invalid, res); } @@ -123,9 +125,9 @@ class TestSparseCOOTensorBase : public ::testing::Test { 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; auto dense_data = Buffer::Wrap(dense_values); NumericTensor dense_tensor(dense_data, shape_, {}, dim_names_); - ASSERT_OK( - SparseCOOTensor::Make(dense_tensor, TypeTraits::type_singleton()) - .Value(&sparse_tensor_from_dense_)); + ASSERT_OK(SparseCOOTensor::MakeSafe(dense_tensor, + TypeTraits::type_singleton()) + .Value(&sparse_tensor_from_dense_)); } protected: @@ -189,7 +191,7 @@ TEST_F(TestSparseCOOTensor, CreationFromNumericTensor1D) { NumericTensor dense_vector(dense_data, dense_shape); std::shared_ptr st; - ASSERT_OK(SparseCOOTensor::Make(dense_vector).Value(&st)); + ASSERT_OK(SparseCOOTensor::MakeSafe(dense_vector).Value(&st)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -215,7 +217,7 @@ TEST_F(TestSparseCOOTensor, CreationFromTensor) { Tensor tensor(int64(), buffer, this->shape_, {}, this->dim_names_); std::shared_ptr st; - ASSERT_OK(SparseCOOTensor::Make(tensor).Value(&st)); + ASSERT_OK(SparseCOOTensor::MakeSafe(tensor).Value(&st)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -237,7 +239,7 @@ TEST_F(TestSparseCOOTensor, CreationFromNonContiguousTensor) { Tensor tensor(int64(), buffer, this->shape_, strides); std::shared_ptr st; - ASSERT_OK(SparseCOOTensor::Make(tensor).Value(&st)); + ASSERT_OK(SparseCOOTensor::MakeSafe(tensor).Value(&st)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -256,10 +258,10 @@ TEST_F(TestSparseCOOTensor, TensorEquality) { NumericTensor tensor2(buffer2, this->shape_); std::shared_ptr st1; - ASSERT_OK(SparseCOOTensor::Make(tensor1).Value(&st1)); + ASSERT_OK(SparseCOOTensor::MakeSafe(tensor1).Value(&st1)); std::shared_ptr st2; - ASSERT_OK(SparseCOOTensor::Make(tensor2).Value(&st2)); + ASSERT_OK(SparseCOOTensor::MakeSafe(tensor2).Value(&st2)); ASSERT_TRUE(st1->Equals(*this->sparse_tensor_from_dense_)); ASSERT_FALSE(st1->Equals(*st2)); @@ -273,7 +275,7 @@ TEST_F(TestSparseCOOTensor, TestToTensor) { Tensor tensor(int64(), buffer, shape, {}, this->dim_names_); std::shared_ptr sparse_tensor; - ASSERT_OK(SparseCOOTensor::Make(tensor).Value(&sparse_tensor)); + ASSERT_OK(SparseCOOTensor::MakeSafe(tensor).Value(&sparse_tensor)); ASSERT_EQ(5, sparse_tensor->non_zero_length()); ASSERT_TRUE(sparse_tensor->is_mutable()); @@ -332,28 +334,28 @@ TYPED_TEST_P(TestSparseCOOTensorForIndexValueType, Make) { // OK auto res = - SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, this->dim_names_); + SparseCOOTensor::MakeSafe(si, int64(), sparse_data, this->shape_, this->dim_names_); ASSERT_OK(res); // OK with an empty dim_names - res = SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, {}); + res = SparseCOOTensor::MakeSafe(si, int64(), sparse_data, this->shape_, {}); ASSERT_OK(res); // invalid data type - res = SparseCOOTensor::Make(si, binary(), sparse_data, this->shape_, {}); + res = SparseCOOTensor::MakeSafe(si, binary(), sparse_data, this->shape_, {}); ASSERT_RAISES(Invalid, res); // negative items in shape - res = SparseCOOTensor::Make(si, int64(), sparse_data, {2, -3, 4}, {}); + res = SparseCOOTensor::MakeSafe(si, int64(), sparse_data, {2, -3, 4}, {}); ASSERT_RAISES(Invalid, res); // sparse index and ndim (shape length) are inconsistent - res = SparseCOOTensor::Make(si, int64(), sparse_data, {6, 4}, {}); + res = SparseCOOTensor::MakeSafe(si, int64(), sparse_data, {6, 4}, {}); ASSERT_RAISES(Invalid, res); // shape and dim_names are inconsistent - res = SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, - std::vector{"foo"}); + res = SparseCOOTensor::MakeSafe(si, int64(), sparse_data, this->shape_, + std::vector{"foo"}); ASSERT_RAISES(Invalid, res); } @@ -485,9 +487,9 @@ class TestSparseCSRMatrixBase : public ::testing::Test { 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; auto dense_data = Buffer::Wrap(dense_values); NumericTensor dense_tensor(dense_data, shape_, {}, dim_names_); - ASSERT_OK( - SparseCSRMatrix::Make(dense_tensor, TypeTraits::type_singleton()) - .Value(&sparse_tensor_from_dense_)); + ASSERT_OK(SparseCSRMatrix::MakeSafe(dense_tensor, + TypeTraits::type_singleton()) + .Value(&sparse_tensor_from_dense_)); } protected: @@ -505,7 +507,7 @@ TEST_F(TestSparseCSRMatrix, CreationFromNumericTensor2D) { NumericTensor tensor(buffer, this->shape_); std::shared_ptr st1; - ASSERT_OK(SparseCSRMatrix::Make(tensor).Value(&st1)); + ASSERT_OK(SparseCSRMatrix::MakeSafe(tensor).Value(&st1)); auto st2 = this->sparse_tensor_from_dense_; @@ -557,7 +559,7 @@ TEST_F(TestSparseCSRMatrix, CreationFromNonContiguousTensor) { Tensor tensor(int64(), buffer, this->shape_, strides); std::shared_ptr st; - ASSERT_OK(SparseCSRMatrix::Make(tensor).Value(&st)); + ASSERT_OK(SparseCSRMatrix::MakeSafe(tensor).Value(&st)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -599,8 +601,8 @@ TEST_F(TestSparseCSRMatrix, TensorEquality) { NumericTensor tensor2(buffer2, this->shape_); std::shared_ptr st1, st2; - ASSERT_OK(SparseCSRMatrix::Make(tensor1).Value(&st1)); - ASSERT_OK(SparseCSRMatrix::Make(tensor2).Value(&st2)); + ASSERT_OK(SparseCSRMatrix::MakeSafe(tensor1).Value(&st1)); + ASSERT_OK(SparseCSRMatrix::MakeSafe(tensor2).Value(&st2)); ASSERT_TRUE(st1->Equals(*this->sparse_tensor_from_dense_)); ASSERT_FALSE(st1->Equals(*st2)); @@ -614,7 +616,7 @@ TEST_F(TestSparseCSRMatrix, TestToTensor) { Tensor tensor(int64(), buffer, shape, {}, this->dim_names_); std::shared_ptr sparse_tensor; - ASSERT_OK(SparseCSRMatrix::Make(tensor).Value(&sparse_tensor)); + ASSERT_OK(SparseCSRMatrix::MakeSafe(tensor).Value(&sparse_tensor)); ASSERT_EQ(7, sparse_tensor->non_zero_length()); ASSERT_TRUE(sparse_tensor->is_mutable()); @@ -642,9 +644,9 @@ TYPED_TEST_P(TestSparseCSRMatrixForIndexValueType, Make) { std::vector indices_shape = {12}; std::shared_ptr si; - ASSERT_OK(SparseCSRIndex::Make(TypeTraits::type_singleton(), - indptr_shape, indices_shape, Buffer::Wrap(indptr_values), - Buffer::Wrap(indices_values)) + ASSERT_OK(SparseCSRIndex::MakeSafe( + TypeTraits::type_singleton(), indptr_shape, indices_shape, + Buffer::Wrap(indptr_values), Buffer::Wrap(indices_values)) .Value(&si)); std::vector sparse_values = {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}; @@ -653,31 +655,32 @@ TYPED_TEST_P(TestSparseCSRMatrixForIndexValueType, Make) { std::shared_ptr sm; // OK - ASSERT_OK( - SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, this->dim_names_)); + ASSERT_OK(SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, this->shape_, + this->dim_names_)); // OK with an empty dim_names - ASSERT_OK(SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, {})); + ASSERT_OK(SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, this->shape_, {})); // invalid data type ASSERT_RAISES(Invalid, - SparseCSRMatrix::Make(si, binary(), sparse_data, this->shape_, {})); + SparseCSRMatrix::MakeSafe(si, binary(), sparse_data, this->shape_, {})); // empty shape - ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {}, {})); + ASSERT_RAISES(Invalid, SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, {}, {})); // 1D shape - ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {24}, {})); + ASSERT_RAISES(Invalid, SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, {24}, {})); // negative items in shape - ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {6, -4}, {})); + ASSERT_RAISES(Invalid, + SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, {6, -4}, {})); // sparse index and ndim (shape length) are inconsistent - ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {4, 6}, {})); + ASSERT_RAISES(Invalid, SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, {4, 6}, {})); // shape and dim_names are inconsistent - ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, - std::vector{"foo"})); + ASSERT_RAISES(Invalid, SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, this->shape_, + std::vector{"foo"})); } REGISTER_TYPED_TEST_CASE_P(TestSparseCSRMatrixForIndexValueType, Make); diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index cd77fb44b05c..28db99a02c77 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -85,11 +85,10 @@ class ARROW_EXPORT Tensor { /// \param[in] strides The strides of the tensor /// (if this is empty, the data assumed to be row-major) /// \param[in] dim_names The names of the tensor dimensions - static Result> Make(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape, - const std::vector& strides, - const std::vector& dim_names) { + static Result> MakeSafe( + const std::shared_ptr& type, const std::shared_ptr& data, + const std::vector& shape, const std::vector& strides, + const std::vector& dim_names) { ARROW_RETURN_NOT_OK( internal::ValidateTensorParameters(type, data, shape, strides, dim_names)); return std::make_shared(type, data, shape, strides, dim_names); @@ -104,11 +103,11 @@ class ARROW_EXPORT Tensor { /// \param[in] data The buffer of the tensor content /// \param[in] shape The shape of the tensor /// \param[in] strides The strides of the tensor - static Result> Make(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape, - const std::vector& strides) { - return Make(type, data, shape, strides, {}); + static Result> MakeSafe(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides) { + return MakeSafe(type, data, shape, strides, {}); } /// \brief Create a Tensor with full parameters with empty dim_names, the data assumed @@ -120,10 +119,10 @@ class ARROW_EXPORT Tensor { /// \param[in] type The data type of the tensor values /// \param[in] data The buffer of the tensor content /// \param[in] shape The shape of the tensor - static Result> Make(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape) { - return Make(type, data, shape, {}, {}); + static Result> MakeSafe(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape) { + return MakeSafe(type, data, shape, {}, {}); } virtual ~Tensor() = default; @@ -231,7 +230,7 @@ class NumericTensor : public Tensor { /// \param[in] strides The strides of the tensor /// (if this is empty, the data assumed to be row-major) /// \param[in] dim_names The names of the tensor dimensions - static Result>> Make( + static Result>> MakeSafe( const std::shared_ptr& data, const std::vector& shape, const std::vector& strides, const std::vector& dim_names) { ARROW_RETURN_NOT_OK(internal::ValidateTensorParameters( @@ -247,10 +246,10 @@ class NumericTensor : public Tensor { /// \param[in] data The buffer of the tensor content /// \param[in] shape The shape of the tensor /// \param[in] strides The strides of the tensor - static Result>> Make( + static Result>> MakeSafe( const std::shared_ptr& data, const std::vector& shape, const std::vector& strides) { - return Make(data, shape, strides, {}); + return MakeSafe(data, shape, strides, {}); } /// \brief Create a NumericTensor with full parameters with empty strides and empty @@ -261,9 +260,9 @@ class NumericTensor : public Tensor { /// /// \param[in] data The buffer of the tensor content /// \param[in] shape The shape of the tensor - static Result>> Make( + static Result>> MakeSafe( const std::shared_ptr& data, const std::vector& shape) { - return Make(data, shape, {}, {}); + return MakeSafe(data, shape, {}, {}); } /// Constructor with non-negative strides and dimension names diff --git a/cpp/src/arrow/tensor_test.cc b/cpp/src/arrow/tensor_test.cc index 945eabde18dc..5a27aabb56d0 100644 --- a/cpp/src/arrow/tensor_test.cc +++ b/cpp/src/arrow/tensor_test.cc @@ -38,68 +38,71 @@ void AssertCountNonZero(const Tensor& t, int64_t expected) { ASSERT_EQ(count, expected); } -TEST(TestTensor, Make) { +TEST(TestTensor, MakeSafe) { // without strides and dim_names std::vector shape = {3, 6}; std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; auto data = Buffer::Wrap(values); std::shared_ptr tensor1; - ASSERT_OK(Tensor::Make(float64(), data, shape).Value(&tensor1)); + ASSERT_OK(Tensor::MakeSafe(float64(), data, shape).Value(&tensor1)); // without dim_names std::vector strides = {sizeof(double) * 6, sizeof(double)}; std::shared_ptr tensor2; - ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, strides).Value(&tensor2)); + ASSERT_OK(Tensor::MakeSafe(float64(), data, {3, 6}, strides).Value(&tensor2)); EXPECT_TRUE(tensor2->Equals(*tensor1)); // without strides std::vector dim_names = {"foo", "bar"}; std::shared_ptr tensor3; - ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, {}, dim_names).Value(&tensor3)); + ASSERT_OK(Tensor::MakeSafe(float64(), data, {3, 6}, {}, dim_names).Value(&tensor3)); EXPECT_TRUE(tensor3->Equals(*tensor1)); EXPECT_TRUE(tensor3->Equals(*tensor2)); // supply all parameters std::shared_ptr tensor4; - ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, strides, dim_names).Value(&tensor4)); + ASSERT_OK( + Tensor::MakeSafe(float64(), data, {3, 6}, strides, dim_names).Value(&tensor4)); EXPECT_TRUE(tensor4->Equals(*tensor1)); EXPECT_TRUE(tensor4->Equals(*tensor2)); EXPECT_TRUE(tensor4->Equals(*tensor3)); } -TEST(TestTensor, MakeFailureCases) { +TEST(TestTensor, MakeSafeFailureCases) { std::vector shape = {3, 6}; std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; auto data = Buffer::Wrap(values); // null type - ASSERT_RAISES(Invalid, Tensor::Make(nullptr, data, shape)); + ASSERT_RAISES(Invalid, Tensor::MakeSafe(nullptr, data, shape)); // invalid type - ASSERT_RAISES(Invalid, Tensor::Make(binary(), data, shape)); + ASSERT_RAISES(Invalid, Tensor::MakeSafe(binary(), data, shape)); // null data - ASSERT_RAISES(Invalid, Tensor::Make(float64(), nullptr, shape)); + ASSERT_RAISES(Invalid, Tensor::MakeSafe(float64(), nullptr, shape)); // empty shape - ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, {})); + ASSERT_RAISES(Invalid, Tensor::MakeSafe(float64(), data, {})); // negative items in shape - ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, {-3, 6})); + ASSERT_RAISES(Invalid, Tensor::MakeSafe(float64(), data, {-3, 6})); // invalid stride length - ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, {sizeof(double)})); - ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, - {sizeof(double), sizeof(double), sizeof(double)})); + ASSERT_RAISES(Invalid, Tensor::MakeSafe(float64(), data, shape, {sizeof(double)})); + ASSERT_RAISES(Invalid, + Tensor::MakeSafe(float64(), data, shape, + {sizeof(double), sizeof(double), sizeof(double)})); // invalid stride values to involve buffer over run - ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, - {sizeof(double) * 6, sizeof(double) * 2})); - ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, - {sizeof(double) * 12, sizeof(double)})); + ASSERT_RAISES(Invalid, Tensor::MakeSafe(float64(), data, shape, + {sizeof(double) * 6, sizeof(double) * 2})); + ASSERT_RAISES(Invalid, Tensor::MakeSafe(float64(), data, shape, + {sizeof(double) * 12, sizeof(double)})); // too many dim_names are supplied - ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, {}, {"foo", "bar", "baz"})); + ASSERT_RAISES(Invalid, + Tensor::MakeSafe(float64(), data, shape, {}, {"foo", "bar", "baz"})); } TEST(TestTensor, ZeroDim) { @@ -422,31 +425,32 @@ REGISTER_TYPED_TEST_CASE_P(TestFloatTensor, Equals); INSTANTIATE_TYPED_TEST_CASE_P(Float32, TestFloatTensor, FloatType); INSTANTIATE_TYPED_TEST_CASE_P(Float64, TestFloatTensor, DoubleType); -TEST(TestNumericTensor, Make) { +TEST(TestNumericTensor, MakeSafe) { // without strides and dim_names std::vector shape = {3, 6}; std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; auto data = Buffer::Wrap(values); std::shared_ptr> tensor1; - ASSERT_OK(NumericTensor::Make(data, shape).Value(&tensor1)); + ASSERT_OK(NumericTensor::MakeSafe(data, shape).Value(&tensor1)); // without dim_names std::vector strides = {sizeof(double) * 6, sizeof(double)}; std::shared_ptr> tensor2; - ASSERT_OK(NumericTensor::Make(data, {3, 6}, strides).Value(&tensor2)); + ASSERT_OK(NumericTensor::MakeSafe(data, {3, 6}, strides).Value(&tensor2)); EXPECT_TRUE(tensor2->Equals(*tensor1)); // without strides std::vector dim_names = {"foo", "bar"}; std::shared_ptr> tensor3; - ASSERT_OK(NumericTensor::Make(data, {3, 6}, {}, dim_names).Value(&tensor3)); + ASSERT_OK( + NumericTensor::MakeSafe(data, {3, 6}, {}, dim_names).Value(&tensor3)); EXPECT_TRUE(tensor3->Equals(*tensor1)); EXPECT_TRUE(tensor3->Equals(*tensor2)); // supply all parameters std::shared_ptr> tensor4; - ASSERT_OK( - NumericTensor::Make(data, {3, 6}, strides, dim_names).Value(&tensor4)); + ASSERT_OK(NumericTensor::MakeSafe(data, {3, 6}, strides, dim_names) + .Value(&tensor4)); EXPECT_TRUE(tensor4->Equals(*tensor1)); EXPECT_TRUE(tensor4->Equals(*tensor2)); EXPECT_TRUE(tensor4->Equals(*tensor3)); From 0c69d600d56390b2088833762b75086032da9a01 Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Tue, 26 Nov 2019 16:19:36 +0900 Subject: [PATCH 10/19] Refactoring --- cpp/src/arrow/sparse_tensor.cc | 63 +++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index dd582aa1f098..e15e42450067 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -515,20 +515,31 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t // ---------------------------------------------------------------------- // SparseCOOIndex -Result> SparseCOOIndex::MakeSafe( - const std::shared_ptr& indices_type, - const std::vector& indices_shape, - const std::vector& indices_strides, std::shared_ptr indices_data) { - if (!is_integer(indices_type->id())) { +namespace { + +inline Status CheckSparseCOOIndexValidity(const std::shared_ptr& type, + const std::vector& shape, + const std::vector& strides) { + if (!is_integer(type->id())) { return Status::Invalid("Type of SparseCOOIndex indices must be integer"); } - if (indices_shape.size() != 2) { + if (shape.size() != 2) { return Status::Invalid("SparseCOOIndex indices must be a matrix"); } - if (!internal::IsTensorStridesContiguous(indices_type, indices_shape, - indices_strides)) { + if (!internal::IsTensorStridesContiguous(type, shape, strides)) { return Status::Invalid("SparseCOOIndex indices must be contiguous"); } + return Status::OK(); +} + +} // namespace + +Result> SparseCOOIndex::MakeSafe( + const std::shared_ptr& indices_type, + const std::vector& indices_shape, + const std::vector& indices_strides, std::shared_ptr indices_data) { + RETURN_NOT_OK( + CheckSparseCOOIndexValidity(indices_type, indices_shape, indices_strides)); return std::make_shared(std::make_shared( indices_type, indices_data, indices_shape, indices_strides)); } @@ -546,9 +557,9 @@ Result> SparseCOOIndex::MakeSafe( // Constructor with a contiguous NumericTensor SparseCOOIndex::SparseCOOIndex(const std::shared_ptr& coords) : SparseIndexBase(coords->shape()[0]), coords_(coords) { - ARROW_CHECK(is_integer(coords_->type_id())); - ARROW_CHECK(coords_->is_contiguous()); - ARROW_CHECK_EQ(2, coords_->ndim()); + ARROW_CHECK( + CheckSparseCOOIndexValidity(coords_->type(), coords_->shape(), coords_->strides()) + .ok()); } std::string SparseCOOIndex::ToString() const { return std::string("SparseCOOIndex"); } @@ -556,11 +567,12 @@ std::string SparseCOOIndex::ToString() const { return std::string("SparseCOOInde // ---------------------------------------------------------------------- // SparseCSRIndex -Result> SparseCSRIndex::MakeSafe( - const std::shared_ptr& indptr_type, - const std::shared_ptr& indices_type, - const std::vector& indptr_shape, const std::vector& indices_shape, - std::shared_ptr indptr_data, std::shared_ptr indices_data) { +namespace { + +inline Status CheckSparseCSRIndexValidity(const std::shared_ptr& indptr_type, + const std::shared_ptr& indices_type, + const std::vector& indptr_shape, + const std::vector& indices_shape) { if (!is_integer(indptr_type->id())) { return Status::Invalid("Type of SparseCSRIndex indptr must be integer"); } @@ -573,6 +585,18 @@ Result> SparseCSRIndex::MakeSafe( if (indices_shape.size() != 1) { return Status::Invalid("SparseCSRIndex indices must be a vector"); } + return Status::OK(); +} + +} // namespace + +Result> SparseCSRIndex::MakeSafe( + const std::shared_ptr& indptr_type, + const std::shared_ptr& indices_type, + const std::vector& indptr_shape, const std::vector& indices_shape, + std::shared_ptr indptr_data, std::shared_ptr indices_data) { + RETURN_NOT_OK(CheckSparseCSRIndexValidity(indptr_type, indices_type, indptr_shape, + indices_shape)); return std::make_shared( std::make_shared(indptr_type, indptr_data, indptr_shape), std::make_shared(indices_type, indices_data, indices_shape)); @@ -593,10 +617,9 @@ Result> SparseCSRIndex::MakeSafe( SparseCSRIndex::SparseCSRIndex(const std::shared_ptr& indptr, const std::shared_ptr& indices) : SparseIndexBase(indices->shape()[0]), indptr_(indptr), indices_(indices) { - ARROW_CHECK(is_integer(indptr_->type_id())); - ARROW_CHECK_EQ(1, indptr_->ndim()); - ARROW_CHECK(is_integer(indices_->type_id())); - ARROW_CHECK_EQ(1, indices_->ndim()); + ARROW_CHECK(CheckSparseCSRIndexValidity(indptr_->type(), indices_->type(), + indptr_->shape(), indices_->shape()) + .ok()); } std::string SparseCSRIndex::ToString() const { return std::string("SparseCSRIndex"); } From 03ce831fcf4aabbe552e6f8515a9ce08fcd5f67f Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Wed, 27 Nov 2019 18:16:59 +0900 Subject: [PATCH 11/19] Fix error messages --- cpp/src/arrow/sparse_tensor.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index 36fa4dc2ba1d..235a2dfa89c8 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -209,7 +209,7 @@ class ARROW_EXPORT SparseCSRIndex : public internal::SparseIndexBase 2) { @@ -220,8 +220,7 @@ class ARROW_EXPORT SparseCSRIndex : public internal::SparseIndexBase Date: Thu, 28 Nov 2019 09:30:13 +0900 Subject: [PATCH 12/19] Rename MakeSafe to Make --- cpp/src/arrow/ipc/read_write_test.cc | 17 +++-- cpp/src/arrow/ipc/reader.cc | 9 ++- cpp/src/arrow/python/numpy_convert.cc | 4 +- cpp/src/arrow/sparse_tensor.cc | 14 ++-- cpp/src/arrow/sparse_tensor.h | 46 ++++++------- cpp/src/arrow/sparse_tensor_test.cc | 99 +++++++++++++-------------- cpp/src/arrow/tensor.h | 37 +++++----- cpp/src/arrow/tensor_test.cc | 54 +++++++-------- 8 files changed, 136 insertions(+), 144 deletions(-) diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc index 676230e9f643..06c9406c4032 100644 --- a/cpp/src/arrow/ipc/read_write_test.cc +++ b/cpp/src/arrow/ipc/read_write_test.cc @@ -1286,9 +1286,9 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCOOIndexRowMajor) { 1, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 3}; const int sizeof_index_value = sizeof(c_index_value_type); std::shared_ptr si; - ASSERT_OK(SparseCOOIndex::MakeSafe( - TypeTraits::type_singleton(), {12, 3}, - {sizeof_index_value * 3, sizeof_index_value}, Buffer::Wrap(coords_values)) + ASSERT_OK(SparseCOOIndex::Make(TypeTraits::type_singleton(), {12, 3}, + {sizeof_index_value * 3, sizeof_index_value}, + Buffer::Wrap(coords_values)) .Value(&si)); std::vector shape = {2, 3, 4}; @@ -1332,10 +1332,9 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCOOIndexColumnMajor) { 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}; const int sizeof_index_value = sizeof(c_index_value_type); std::shared_ptr si; - ASSERT_OK(SparseCOOIndex::MakeSafe(TypeTraits::type_singleton(), - {12, 3}, - {sizeof_index_value, sizeof_index_value * 12}, - Buffer::Wrap(coords_values)) + ASSERT_OK(SparseCOOIndex::Make(TypeTraits::type_singleton(), {12, 3}, + {sizeof_index_value, sizeof_index_value * 12}, + Buffer::Wrap(coords_values)) .Value(&si)); std::vector shape = {2, 3, 4}; @@ -1361,8 +1360,8 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCSRIndex) { auto data = Buffer::Wrap(values); NumericTensor t(data, shape, {}, dim_names); std::shared_ptr st; - ASSERT_OK(SparseCSRMatrix::MakeSafe(t, TypeTraits::type_singleton()) - .Value(&st)); + ASSERT_OK( + SparseCSRMatrix::Make(t, TypeTraits::type_singleton()).Value(&st)); this->CheckSparseTensorRoundTrip(*st); } diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index a5cdf996448f..9d240491ee4c 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -1031,8 +1031,8 @@ Status ReadSparseTensorPayload(const IpcPayload& payload, std::shared_ptr indices_type; RETURN_NOT_OK(internal::GetSparseCOOIndexMetadata( sparse_tensor->sparseIndex_as_SparseTensorIndexCOO(), &indices_type)); - RETURN_NOT_OK(SparseCOOIndex::MakeSafe(indices_type, shape, non_zero_length, - payload.body_buffers[0]) + RETURN_NOT_OK(SparseCOOIndex::Make(indices_type, shape, non_zero_length, + payload.body_buffers[0]) .Value(&sparse_index)); return MakeSparseTensorWithSparseCOOIndex(type, shape, dim_names, sparse_index, non_zero_length, payload.body_buffers[1], @@ -1047,9 +1047,8 @@ Status ReadSparseTensorPayload(const IpcPayload& payload, sparse_tensor->sparseIndex_as_SparseMatrixIndexCSR(), &indptr_type, &indices_type)); ARROW_CHECK_EQ(indptr_type, indices_type); - RETURN_NOT_OK(SparseCSRIndex::MakeSafe(indices_type, shape, non_zero_length, - payload.body_buffers[0], - payload.body_buffers[1]) + RETURN_NOT_OK(SparseCSRIndex::Make(indices_type, shape, non_zero_length, + payload.body_buffers[0], payload.body_buffers[1]) .Value(&sparse_index)); return MakeSparseTensorWithSparseCSRIndex(type, shape, dim_names, sparse_index, non_zero_length, payload.body_buffers[2], diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc index a049760915f3..b397e27459ef 100644 --- a/cpp/src/arrow/python/numpy_convert.cc +++ b/cpp/src/arrow/python/numpy_convert.cc @@ -414,12 +414,12 @@ Status NdarraysToSparseCSRMatrix(MemoryPool* pool, PyObject* data_ao, PyObject* Status TensorToSparseCOOTensor(const std::shared_ptr& tensor, std::shared_ptr* out) { - return SparseCOOTensor::MakeSafe(*tensor).Value(out); + return SparseCOOTensor::Make(*tensor).Value(out); } Status TensorToSparseCSRMatrix(const std::shared_ptr& tensor, std::shared_ptr* out) { - return SparseCSRMatrix::MakeSafe(*tensor).Value(out); + return SparseCSRMatrix::Make(*tensor).Value(out); } } // namespace py diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index e15e42450067..ce6eb02c9632 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -534,7 +534,7 @@ inline Status CheckSparseCOOIndexValidity(const std::shared_ptr& type, } // namespace -Result> SparseCOOIndex::MakeSafe( +Result> SparseCOOIndex::Make( const std::shared_ptr& indices_type, const std::vector& indices_shape, const std::vector& indices_strides, std::shared_ptr indices_data) { @@ -544,14 +544,14 @@ Result> SparseCOOIndex::MakeSafe( indices_type, indices_data, indices_shape, indices_strides)); } -Result> SparseCOOIndex::MakeSafe( +Result> SparseCOOIndex::Make( const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indices_data) { auto ndim = static_cast(shape.size()); const int64_t elsize = sizeof(indices_type.get()); std::vector indices_shape({non_zero_length, ndim}); std::vector indices_strides({elsize, elsize * non_zero_length}); - return MakeSafe(indices_type, indices_shape, indices_strides, indices_data); + return Make(indices_type, indices_shape, indices_strides, indices_data); } // Constructor with a contiguous NumericTensor @@ -590,7 +590,7 @@ inline Status CheckSparseCSRIndexValidity(const std::shared_ptr& indpt } // namespace -Result> SparseCSRIndex::MakeSafe( +Result> SparseCSRIndex::Make( const std::shared_ptr& indptr_type, const std::shared_ptr& indices_type, const std::vector& indptr_shape, const std::vector& indices_shape, @@ -602,15 +602,15 @@ Result> SparseCSRIndex::MakeSafe( std::make_shared(indices_type, indices_data, indices_shape)); } -Result> SparseCSRIndex::MakeSafe( +Result> SparseCSRIndex::Make( const std::shared_ptr& indptr_type, const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indptr_data, std::shared_ptr indices_data) { std::vector indptr_shape({shape[0] + 1}); std::vector indices_shape({non_zero_length}); - return MakeSafe(indptr_type, indices_type, indptr_shape, indices_shape, indptr_data, - indices_data); + return Make(indptr_type, indices_type, indptr_shape, indices_shape, indptr_data, + indices_data); } // Constructor with two index vectors diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index 235a2dfa89c8..e1bb1c0fe934 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -92,13 +92,13 @@ class ARROW_EXPORT SparseCOOIndex : public internal::SparseIndexBase> MakeSafe( + static Result> Make( const std::shared_ptr& indices_type, const std::vector& indices_shape, const std::vector& indices_strides, std::shared_ptr indices_data); /// \brief Make SparseCOOIndex from sparse tensor's shape properties and data - static Result> MakeSafe( + static Result> Make( const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indices_data); @@ -156,35 +156,35 @@ class ARROW_EXPORT SparseCSRIndex : public internal::SparseIndexBase> MakeSafe( + static Result> Make( const std::shared_ptr& indptr_type, const std::shared_ptr& indices_type, const std::vector& indptr_shape, const std::vector& indices_shape, std::shared_ptr indptr_data, std::shared_ptr indices_data); /// \brief Make SparseCSRIndex from raw properties - static Result> MakeSafe( + static Result> Make( const std::shared_ptr& indices_type, const std::vector& indptr_shape, const std::vector& indices_shape, std::shared_ptr indptr_data, std::shared_ptr indices_data) { - return MakeSafe(indices_type, indices_type, indptr_shape, indices_shape, indptr_data, - indices_data); + return Make(indices_type, indices_type, indptr_shape, indices_shape, indptr_data, + indices_data); } /// \brief Make SparseCSRIndex from sparse tensor's shape properties and data - static Result> MakeSafe( + static Result> Make( const std::shared_ptr& indptr_type, const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indptr_data, std::shared_ptr indices_data); /// \brief Make SparseCSRIndex from sparse tensor's shape properties and data - static Result> MakeSafe( + static Result> Make( const std::shared_ptr& indices_type, const std::vector& shape, int64_t non_zero_length, std::shared_ptr indptr_data, std::shared_ptr indices_data) { - return MakeSafe(indices_type, indices_type, shape, non_zero_length, indptr_data, - indices_data); + return Make(indices_type, indices_type, shape, non_zero_length, indptr_data, + indices_data); } /// \brief Construct SparseCSRIndex from two index vectors @@ -340,7 +340,7 @@ class SparseTensorImpl : public SparseTensor { : SparseTensorImpl(NULLPTR, type, NULLPTR, shape, dim_names) {} /// \brief Create a SparseTensor with full parameters - static Result>> MakeSafe( + static Result>> Make( const std::shared_ptr& sparse_index, const std::shared_ptr& type, const std::shared_ptr& data, const std::vector& shape, const std::vector& dim_names) { @@ -360,7 +360,7 @@ class SparseTensorImpl : public SparseTensor { /// /// The dense tensor is re-encoded as a sparse index and a physical /// data buffer for the non-zero value. - static Result>> MakeSafe( + static Result>> Make( const Tensor& tensor, const std::shared_ptr& index_value_type, MemoryPool* pool) { std::shared_ptr sparse_index; @@ -373,47 +373,47 @@ class SparseTensorImpl : public SparseTensor { data, tensor.shape(), tensor.dim_names_); } - static Result>> MakeSafe( + static Result>> Make( const Tensor& tensor, const std::shared_ptr& index_value_type) { - return MakeSafe(tensor, index_value_type, default_memory_pool()); + return Make(tensor, index_value_type, default_memory_pool()); } - static Result>> MakeSafe( + static Result>> Make( const Tensor& tensor, MemoryPool* pool) { - return MakeSafe(tensor, int64(), pool); + return Make(tensor, int64(), pool); } - static Result>> MakeSafe( + static Result>> Make( const Tensor& tensor) { - return MakeSafe(tensor, default_memory_pool()); + return Make(tensor, default_memory_pool()); } /// \brief Create a sparse tensor from a dense tensor /// /// The dense tensor is re-encoded as a sparse index and a physical /// data buffer for the non-zero value. - ARROW_DEPRECATED("Use MakeSafe") + ARROW_DEPRECATED("Use Result-returning version") static Status Make(const Tensor& tensor, const std::shared_ptr& index_value_type, MemoryPool* pool, std::shared_ptr>* out) { - auto result = MakeSafe(tensor, index_value_type, pool); + auto result = Make(tensor, index_value_type, pool); return std::move(result).Value(out); } - ARROW_DEPRECATED("Use MakeSafe") + ARROW_DEPRECATED("Use Result-returning version") static Status Make(const Tensor& tensor, const std::shared_ptr& index_value_type, std::shared_ptr>* out) { return Make(tensor, index_value_type, default_memory_pool(), out); } - ARROW_DEPRECATED("Use MakeSafe") + ARROW_DEPRECATED("Use Result-returning version") static Status Make(const Tensor& tensor, MemoryPool* pool, std::shared_ptr>* out) { return Make(tensor, int64(), pool, out); } - ARROW_DEPRECATED("Use MakeSafe") + ARROW_DEPRECATED("Use Result-returning version") static Status Make(const Tensor& tensor, std::shared_ptr>* out) { return Make(tensor, default_memory_pool(), out); diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 14003057973d..4091335e1307 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -55,20 +55,20 @@ TEST(TestSparseCOOIndex, Make) { std::vector stride = {3 * sizeof(int32_t), sizeof(int32_t)}; // OK - auto res = SparseCOOIndex::MakeSafe(int32(), shape, stride, data); + auto res = SparseCOOIndex::Make(int32(), shape, stride, data); ASSERT_OK(res); // Non-integer type - res = SparseCOOIndex::MakeSafe(float32(), shape, stride, data); + res = SparseCOOIndex::Make(float32(), shape, stride, data); ASSERT_RAISES(Invalid, res); // Non-matrix indices - res = SparseCOOIndex::MakeSafe(int32(), {4, 3, 4}, stride, data); + res = SparseCOOIndex::Make(int32(), {4, 3, 4}, stride, data); ASSERT_RAISES(Invalid, res); // Non-contiguous indices - res = SparseCOOIndex::MakeSafe(int32(), {6, 3}, - {6 * sizeof(int32_t), 2 * sizeof(int32_t)}, data); + res = SparseCOOIndex::Make(int32(), {6, 3}, {6 * sizeof(int32_t), 2 * sizeof(int32_t)}, + data); ASSERT_RAISES(Invalid, res); } @@ -81,23 +81,21 @@ TEST(TestSparseCSRIndex, Make) { std::vector indices_shape = {12}; // OK - auto res = SparseCSRIndex::MakeSafe(int32(), indptr_shape, indices_shape, indptr_data, - indices_data); + auto res = SparseCSRIndex::Make(int32(), indptr_shape, indices_shape, indptr_data, + indices_data); ASSERT_OK(res); // Non-integer type - res = SparseCSRIndex::MakeSafe(float32(), indptr_shape, indices_shape, indptr_data, - indices_data); + res = SparseCSRIndex::Make(float32(), indptr_shape, indices_shape, indptr_data, + indices_data); ASSERT_RAISES(Invalid, res); // Non-vector indptr shape - res = - SparseCSRIndex::MakeSafe(int32(), {1, 2}, indices_shape, indptr_data, indices_data); + res = SparseCSRIndex::Make(int32(), {1, 2}, indices_shape, indptr_data, indices_data); ASSERT_RAISES(Invalid, res); // Non-vector indices shape - res = - SparseCSRIndex::MakeSafe(int32(), indptr_shape, {1, 2}, indptr_data, indices_data); + res = SparseCSRIndex::Make(int32(), indptr_shape, {1, 2}, indptr_data, indices_data); ASSERT_RAISES(Invalid, res); } @@ -125,9 +123,9 @@ class TestSparseCOOTensorBase : public ::testing::Test { 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; auto dense_data = Buffer::Wrap(dense_values); NumericTensor dense_tensor(dense_data, shape_, {}, dim_names_); - ASSERT_OK(SparseCOOTensor::MakeSafe(dense_tensor, - TypeTraits::type_singleton()) - .Value(&sparse_tensor_from_dense_)); + ASSERT_OK( + SparseCOOTensor::Make(dense_tensor, TypeTraits::type_singleton()) + .Value(&sparse_tensor_from_dense_)); } protected: @@ -191,7 +189,7 @@ TEST_F(TestSparseCOOTensor, CreationFromNumericTensor1D) { NumericTensor dense_vector(dense_data, dense_shape); std::shared_ptr st; - ASSERT_OK(SparseCOOTensor::MakeSafe(dense_vector).Value(&st)); + ASSERT_OK(SparseCOOTensor::Make(dense_vector).Value(&st)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -217,7 +215,7 @@ TEST_F(TestSparseCOOTensor, CreationFromTensor) { Tensor tensor(int64(), buffer, this->shape_, {}, this->dim_names_); std::shared_ptr st; - ASSERT_OK(SparseCOOTensor::MakeSafe(tensor).Value(&st)); + ASSERT_OK(SparseCOOTensor::Make(tensor).Value(&st)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -239,7 +237,7 @@ TEST_F(TestSparseCOOTensor, CreationFromNonContiguousTensor) { Tensor tensor(int64(), buffer, this->shape_, strides); std::shared_ptr st; - ASSERT_OK(SparseCOOTensor::MakeSafe(tensor).Value(&st)); + ASSERT_OK(SparseCOOTensor::Make(tensor).Value(&st)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -258,10 +256,10 @@ TEST_F(TestSparseCOOTensor, TensorEquality) { NumericTensor tensor2(buffer2, this->shape_); std::shared_ptr st1; - ASSERT_OK(SparseCOOTensor::MakeSafe(tensor1).Value(&st1)); + ASSERT_OK(SparseCOOTensor::Make(tensor1).Value(&st1)); std::shared_ptr st2; - ASSERT_OK(SparseCOOTensor::MakeSafe(tensor2).Value(&st2)); + ASSERT_OK(SparseCOOTensor::Make(tensor2).Value(&st2)); ASSERT_TRUE(st1->Equals(*this->sparse_tensor_from_dense_)); ASSERT_FALSE(st1->Equals(*st2)); @@ -275,7 +273,7 @@ TEST_F(TestSparseCOOTensor, TestToTensor) { Tensor tensor(int64(), buffer, shape, {}, this->dim_names_); std::shared_ptr sparse_tensor; - ASSERT_OK(SparseCOOTensor::MakeSafe(tensor).Value(&sparse_tensor)); + ASSERT_OK(SparseCOOTensor::Make(tensor).Value(&sparse_tensor)); ASSERT_EQ(5, sparse_tensor->non_zero_length()); ASSERT_TRUE(sparse_tensor->is_mutable()); @@ -334,28 +332,28 @@ TYPED_TEST_P(TestSparseCOOTensorForIndexValueType, Make) { // OK auto res = - SparseCOOTensor::MakeSafe(si, int64(), sparse_data, this->shape_, this->dim_names_); + SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, this->dim_names_); ASSERT_OK(res); // OK with an empty dim_names - res = SparseCOOTensor::MakeSafe(si, int64(), sparse_data, this->shape_, {}); + res = SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, {}); ASSERT_OK(res); // invalid data type - res = SparseCOOTensor::MakeSafe(si, binary(), sparse_data, this->shape_, {}); + res = SparseCOOTensor::Make(si, binary(), sparse_data, this->shape_, {}); ASSERT_RAISES(Invalid, res); // negative items in shape - res = SparseCOOTensor::MakeSafe(si, int64(), sparse_data, {2, -3, 4}, {}); + res = SparseCOOTensor::Make(si, int64(), sparse_data, {2, -3, 4}, {}); ASSERT_RAISES(Invalid, res); // sparse index and ndim (shape length) are inconsistent - res = SparseCOOTensor::MakeSafe(si, int64(), sparse_data, {6, 4}, {}); + res = SparseCOOTensor::Make(si, int64(), sparse_data, {6, 4}, {}); ASSERT_RAISES(Invalid, res); // shape and dim_names are inconsistent - res = SparseCOOTensor::MakeSafe(si, int64(), sparse_data, this->shape_, - std::vector{"foo"}); + res = SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, + std::vector{"foo"}); ASSERT_RAISES(Invalid, res); } @@ -487,9 +485,9 @@ class TestSparseCSRMatrixBase : public ::testing::Test { 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; auto dense_data = Buffer::Wrap(dense_values); NumericTensor dense_tensor(dense_data, shape_, {}, dim_names_); - ASSERT_OK(SparseCSRMatrix::MakeSafe(dense_tensor, - TypeTraits::type_singleton()) - .Value(&sparse_tensor_from_dense_)); + ASSERT_OK( + SparseCSRMatrix::Make(dense_tensor, TypeTraits::type_singleton()) + .Value(&sparse_tensor_from_dense_)); } protected: @@ -507,7 +505,7 @@ TEST_F(TestSparseCSRMatrix, CreationFromNumericTensor2D) { NumericTensor tensor(buffer, this->shape_); std::shared_ptr st1; - ASSERT_OK(SparseCSRMatrix::MakeSafe(tensor).Value(&st1)); + ASSERT_OK(SparseCSRMatrix::Make(tensor).Value(&st1)); auto st2 = this->sparse_tensor_from_dense_; @@ -559,7 +557,7 @@ TEST_F(TestSparseCSRMatrix, CreationFromNonContiguousTensor) { Tensor tensor(int64(), buffer, this->shape_, strides); std::shared_ptr st; - ASSERT_OK(SparseCSRMatrix::MakeSafe(tensor).Value(&st)); + ASSERT_OK(SparseCSRMatrix::Make(tensor).Value(&st)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -601,8 +599,8 @@ TEST_F(TestSparseCSRMatrix, TensorEquality) { NumericTensor tensor2(buffer2, this->shape_); std::shared_ptr st1, st2; - ASSERT_OK(SparseCSRMatrix::MakeSafe(tensor1).Value(&st1)); - ASSERT_OK(SparseCSRMatrix::MakeSafe(tensor2).Value(&st2)); + ASSERT_OK(SparseCSRMatrix::Make(tensor1).Value(&st1)); + ASSERT_OK(SparseCSRMatrix::Make(tensor2).Value(&st2)); ASSERT_TRUE(st1->Equals(*this->sparse_tensor_from_dense_)); ASSERT_FALSE(st1->Equals(*st2)); @@ -616,7 +614,7 @@ TEST_F(TestSparseCSRMatrix, TestToTensor) { Tensor tensor(int64(), buffer, shape, {}, this->dim_names_); std::shared_ptr sparse_tensor; - ASSERT_OK(SparseCSRMatrix::MakeSafe(tensor).Value(&sparse_tensor)); + ASSERT_OK(SparseCSRMatrix::Make(tensor).Value(&sparse_tensor)); ASSERT_EQ(7, sparse_tensor->non_zero_length()); ASSERT_TRUE(sparse_tensor->is_mutable()); @@ -644,9 +642,9 @@ TYPED_TEST_P(TestSparseCSRMatrixForIndexValueType, Make) { std::vector indices_shape = {12}; std::shared_ptr si; - ASSERT_OK(SparseCSRIndex::MakeSafe( - TypeTraits::type_singleton(), indptr_shape, indices_shape, - Buffer::Wrap(indptr_values), Buffer::Wrap(indices_values)) + ASSERT_OK(SparseCSRIndex::Make(TypeTraits::type_singleton(), + indptr_shape, indices_shape, Buffer::Wrap(indptr_values), + Buffer::Wrap(indices_values)) .Value(&si)); std::vector sparse_values = {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}; @@ -655,32 +653,31 @@ TYPED_TEST_P(TestSparseCSRMatrixForIndexValueType, Make) { std::shared_ptr sm; // OK - ASSERT_OK(SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, this->shape_, - this->dim_names_)); + ASSERT_OK( + SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, this->dim_names_)); // OK with an empty dim_names - ASSERT_OK(SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, this->shape_, {})); + ASSERT_OK(SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, {})); // invalid data type ASSERT_RAISES(Invalid, - SparseCSRMatrix::MakeSafe(si, binary(), sparse_data, this->shape_, {})); + SparseCSRMatrix::Make(si, binary(), sparse_data, this->shape_, {})); // empty shape - ASSERT_RAISES(Invalid, SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, {}, {})); + ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {}, {})); // 1D shape - ASSERT_RAISES(Invalid, SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, {24}, {})); + ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {24}, {})); // negative items in shape - ASSERT_RAISES(Invalid, - SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, {6, -4}, {})); + ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {6, -4}, {})); // sparse index and ndim (shape length) are inconsistent - ASSERT_RAISES(Invalid, SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, {4, 6}, {})); + ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, {4, 6}, {})); // shape and dim_names are inconsistent - ASSERT_RAISES(Invalid, SparseCSRMatrix::MakeSafe(si, int64(), sparse_data, this->shape_, - std::vector{"foo"})); + ASSERT_RAISES(Invalid, SparseCSRMatrix::Make(si, int64(), sparse_data, this->shape_, + std::vector{"foo"})); } REGISTER_TYPED_TEST_CASE_P(TestSparseCSRMatrixForIndexValueType, Make); diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index 28db99a02c77..cd77fb44b05c 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -85,10 +85,11 @@ class ARROW_EXPORT Tensor { /// \param[in] strides The strides of the tensor /// (if this is empty, the data assumed to be row-major) /// \param[in] dim_names The names of the tensor dimensions - static Result> MakeSafe( - const std::shared_ptr& type, const std::shared_ptr& data, - const std::vector& shape, const std::vector& strides, - const std::vector& dim_names) { + static Result> Make(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides, + const std::vector& dim_names) { ARROW_RETURN_NOT_OK( internal::ValidateTensorParameters(type, data, shape, strides, dim_names)); return std::make_shared(type, data, shape, strides, dim_names); @@ -103,11 +104,11 @@ class ARROW_EXPORT Tensor { /// \param[in] data The buffer of the tensor content /// \param[in] shape The shape of the tensor /// \param[in] strides The strides of the tensor - static Result> MakeSafe(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape, - const std::vector& strides) { - return MakeSafe(type, data, shape, strides, {}); + static Result> Make(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides) { + return Make(type, data, shape, strides, {}); } /// \brief Create a Tensor with full parameters with empty dim_names, the data assumed @@ -119,10 +120,10 @@ class ARROW_EXPORT Tensor { /// \param[in] type The data type of the tensor values /// \param[in] data The buffer of the tensor content /// \param[in] shape The shape of the tensor - static Result> MakeSafe(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape) { - return MakeSafe(type, data, shape, {}, {}); + static Result> Make(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape) { + return Make(type, data, shape, {}, {}); } virtual ~Tensor() = default; @@ -230,7 +231,7 @@ class NumericTensor : public Tensor { /// \param[in] strides The strides of the tensor /// (if this is empty, the data assumed to be row-major) /// \param[in] dim_names The names of the tensor dimensions - static Result>> MakeSafe( + static Result>> Make( const std::shared_ptr& data, const std::vector& shape, const std::vector& strides, const std::vector& dim_names) { ARROW_RETURN_NOT_OK(internal::ValidateTensorParameters( @@ -246,10 +247,10 @@ class NumericTensor : public Tensor { /// \param[in] data The buffer of the tensor content /// \param[in] shape The shape of the tensor /// \param[in] strides The strides of the tensor - static Result>> MakeSafe( + static Result>> Make( const std::shared_ptr& data, const std::vector& shape, const std::vector& strides) { - return MakeSafe(data, shape, strides, {}); + return Make(data, shape, strides, {}); } /// \brief Create a NumericTensor with full parameters with empty strides and empty @@ -260,9 +261,9 @@ class NumericTensor : public Tensor { /// /// \param[in] data The buffer of the tensor content /// \param[in] shape The shape of the tensor - static Result>> MakeSafe( + static Result>> Make( const std::shared_ptr& data, const std::vector& shape) { - return MakeSafe(data, shape, {}, {}); + return Make(data, shape, {}, {}); } /// Constructor with non-negative strides and dimension names diff --git a/cpp/src/arrow/tensor_test.cc b/cpp/src/arrow/tensor_test.cc index 5a27aabb56d0..945eabde18dc 100644 --- a/cpp/src/arrow/tensor_test.cc +++ b/cpp/src/arrow/tensor_test.cc @@ -38,71 +38,68 @@ void AssertCountNonZero(const Tensor& t, int64_t expected) { ASSERT_EQ(count, expected); } -TEST(TestTensor, MakeSafe) { +TEST(TestTensor, Make) { // without strides and dim_names std::vector shape = {3, 6}; std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; auto data = Buffer::Wrap(values); std::shared_ptr tensor1; - ASSERT_OK(Tensor::MakeSafe(float64(), data, shape).Value(&tensor1)); + ASSERT_OK(Tensor::Make(float64(), data, shape).Value(&tensor1)); // without dim_names std::vector strides = {sizeof(double) * 6, sizeof(double)}; std::shared_ptr tensor2; - ASSERT_OK(Tensor::MakeSafe(float64(), data, {3, 6}, strides).Value(&tensor2)); + ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, strides).Value(&tensor2)); EXPECT_TRUE(tensor2->Equals(*tensor1)); // without strides std::vector dim_names = {"foo", "bar"}; std::shared_ptr tensor3; - ASSERT_OK(Tensor::MakeSafe(float64(), data, {3, 6}, {}, dim_names).Value(&tensor3)); + ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, {}, dim_names).Value(&tensor3)); EXPECT_TRUE(tensor3->Equals(*tensor1)); EXPECT_TRUE(tensor3->Equals(*tensor2)); // supply all parameters std::shared_ptr tensor4; - ASSERT_OK( - Tensor::MakeSafe(float64(), data, {3, 6}, strides, dim_names).Value(&tensor4)); + ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, strides, dim_names).Value(&tensor4)); EXPECT_TRUE(tensor4->Equals(*tensor1)); EXPECT_TRUE(tensor4->Equals(*tensor2)); EXPECT_TRUE(tensor4->Equals(*tensor3)); } -TEST(TestTensor, MakeSafeFailureCases) { +TEST(TestTensor, MakeFailureCases) { std::vector shape = {3, 6}; std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; auto data = Buffer::Wrap(values); // null type - ASSERT_RAISES(Invalid, Tensor::MakeSafe(nullptr, data, shape)); + ASSERT_RAISES(Invalid, Tensor::Make(nullptr, data, shape)); // invalid type - ASSERT_RAISES(Invalid, Tensor::MakeSafe(binary(), data, shape)); + ASSERT_RAISES(Invalid, Tensor::Make(binary(), data, shape)); // null data - ASSERT_RAISES(Invalid, Tensor::MakeSafe(float64(), nullptr, shape)); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), nullptr, shape)); // empty shape - ASSERT_RAISES(Invalid, Tensor::MakeSafe(float64(), data, {})); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, {})); // negative items in shape - ASSERT_RAISES(Invalid, Tensor::MakeSafe(float64(), data, {-3, 6})); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, {-3, 6})); // invalid stride length - ASSERT_RAISES(Invalid, Tensor::MakeSafe(float64(), data, shape, {sizeof(double)})); - ASSERT_RAISES(Invalid, - Tensor::MakeSafe(float64(), data, shape, - {sizeof(double), sizeof(double), sizeof(double)})); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, {sizeof(double)})); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, + {sizeof(double), sizeof(double), sizeof(double)})); // invalid stride values to involve buffer over run - ASSERT_RAISES(Invalid, Tensor::MakeSafe(float64(), data, shape, - {sizeof(double) * 6, sizeof(double) * 2})); - ASSERT_RAISES(Invalid, Tensor::MakeSafe(float64(), data, shape, - {sizeof(double) * 12, sizeof(double)})); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, + {sizeof(double) * 6, sizeof(double) * 2})); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, + {sizeof(double) * 12, sizeof(double)})); // too many dim_names are supplied - ASSERT_RAISES(Invalid, - Tensor::MakeSafe(float64(), data, shape, {}, {"foo", "bar", "baz"})); + ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, shape, {}, {"foo", "bar", "baz"})); } TEST(TestTensor, ZeroDim) { @@ -425,32 +422,31 @@ REGISTER_TYPED_TEST_CASE_P(TestFloatTensor, Equals); INSTANTIATE_TYPED_TEST_CASE_P(Float32, TestFloatTensor, FloatType); INSTANTIATE_TYPED_TEST_CASE_P(Float64, TestFloatTensor, DoubleType); -TEST(TestNumericTensor, MakeSafe) { +TEST(TestNumericTensor, Make) { // without strides and dim_names std::vector shape = {3, 6}; std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; auto data = Buffer::Wrap(values); std::shared_ptr> tensor1; - ASSERT_OK(NumericTensor::MakeSafe(data, shape).Value(&tensor1)); + ASSERT_OK(NumericTensor::Make(data, shape).Value(&tensor1)); // without dim_names std::vector strides = {sizeof(double) * 6, sizeof(double)}; std::shared_ptr> tensor2; - ASSERT_OK(NumericTensor::MakeSafe(data, {3, 6}, strides).Value(&tensor2)); + ASSERT_OK(NumericTensor::Make(data, {3, 6}, strides).Value(&tensor2)); EXPECT_TRUE(tensor2->Equals(*tensor1)); // without strides std::vector dim_names = {"foo", "bar"}; std::shared_ptr> tensor3; - ASSERT_OK( - NumericTensor::MakeSafe(data, {3, 6}, {}, dim_names).Value(&tensor3)); + ASSERT_OK(NumericTensor::Make(data, {3, 6}, {}, dim_names).Value(&tensor3)); EXPECT_TRUE(tensor3->Equals(*tensor1)); EXPECT_TRUE(tensor3->Equals(*tensor2)); // supply all parameters std::shared_ptr> tensor4; - ASSERT_OK(NumericTensor::MakeSafe(data, {3, 6}, strides, dim_names) - .Value(&tensor4)); + ASSERT_OK( + NumericTensor::Make(data, {3, 6}, strides, dim_names).Value(&tensor4)); EXPECT_TRUE(tensor4->Equals(*tensor1)); EXPECT_TRUE(tensor4->Equals(*tensor2)); EXPECT_TRUE(tensor4->Equals(*tensor3)); From 32ef7922d25aee9af6297f0290a1a689e6dcb11a Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Tue, 3 Dec 2019 09:14:43 +0900 Subject: [PATCH 13/19] Use appropriate macros --- cpp/src/arrow/ipc/read_write_test.cc | 20 ++++++------- cpp/src/arrow/ipc/reader.cc | 13 +++++---- cpp/src/arrow/sparse_tensor.cc | 5 ++-- cpp/src/arrow/sparse_tensor_test.cc | 42 ++++++++++++++-------------- 4 files changed, 40 insertions(+), 40 deletions(-) diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc index 06c9406c4032..083e66a7abcd 100644 --- a/cpp/src/arrow/ipc/read_write_test.cc +++ b/cpp/src/arrow/ipc/read_write_test.cc @@ -1286,10 +1286,10 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCOOIndexRowMajor) { 1, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 3}; const int sizeof_index_value = sizeof(c_index_value_type); std::shared_ptr si; - ASSERT_OK(SparseCOOIndex::Make(TypeTraits::type_singleton(), {12, 3}, - {sizeof_index_value * 3, sizeof_index_value}, - Buffer::Wrap(coords_values)) - .Value(&si)); + ASSERT_OK_AND_ASSIGN( + si, SparseCOOIndex::Make(TypeTraits::type_singleton(), {12, 3}, + {sizeof_index_value * 3, sizeof_index_value}, + Buffer::Wrap(coords_values))); std::vector shape = {2, 3, 4}; std::vector dim_names = {"foo", "bar", "baz"}; @@ -1332,10 +1332,10 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCOOIndexColumnMajor) { 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}; const int sizeof_index_value = sizeof(c_index_value_type); std::shared_ptr si; - ASSERT_OK(SparseCOOIndex::Make(TypeTraits::type_singleton(), {12, 3}, - {sizeof_index_value, sizeof_index_value * 12}, - Buffer::Wrap(coords_values)) - .Value(&si)); + ASSERT_OK_AND_ASSIGN( + si, SparseCOOIndex::Make(TypeTraits::type_singleton(), {12, 3}, + {sizeof_index_value, sizeof_index_value * 12}, + Buffer::Wrap(coords_values))); std::vector shape = {2, 3, 4}; std::vector dim_names = {"foo", "bar", "baz"}; @@ -1360,8 +1360,8 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCSRIndex) { auto data = Buffer::Wrap(values); NumericTensor t(data, shape, {}, dim_names); std::shared_ptr st; - ASSERT_OK( - SparseCSRMatrix::Make(t, TypeTraits::type_singleton()).Value(&st)); + ASSERT_OK_AND_ASSIGN( + st, SparseCSRMatrix::Make(t, TypeTraits::type_singleton())); this->CheckSparseTensorRoundTrip(*st); } diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 9d240491ee4c..c89e75eb35b3 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -1031,9 +1031,9 @@ Status ReadSparseTensorPayload(const IpcPayload& payload, std::shared_ptr indices_type; RETURN_NOT_OK(internal::GetSparseCOOIndexMetadata( sparse_tensor->sparseIndex_as_SparseTensorIndexCOO(), &indices_type)); - RETURN_NOT_OK(SparseCOOIndex::Make(indices_type, shape, non_zero_length, - payload.body_buffers[0]) - .Value(&sparse_index)); + ARROW_ASSIGN_OR_RAISE(sparse_index, + SparseCOOIndex::Make(indices_type, shape, non_zero_length, + payload.body_buffers[0])); return MakeSparseTensorWithSparseCOOIndex(type, shape, dim_names, sparse_index, non_zero_length, payload.body_buffers[1], out); @@ -1047,9 +1047,10 @@ Status ReadSparseTensorPayload(const IpcPayload& payload, sparse_tensor->sparseIndex_as_SparseMatrixIndexCSR(), &indptr_type, &indices_type)); ARROW_CHECK_EQ(indptr_type, indices_type); - RETURN_NOT_OK(SparseCSRIndex::Make(indices_type, shape, non_zero_length, - payload.body_buffers[0], payload.body_buffers[1]) - .Value(&sparse_index)); + ARROW_ASSIGN_OR_RAISE( + sparse_index, + SparseCSRIndex::Make(indices_type, shape, non_zero_length, + payload.body_buffers[0], payload.body_buffers[1])); return MakeSparseTensorWithSparseCSRIndex(type, shape, dim_names, sparse_index, non_zero_length, payload.body_buffers[2], out); diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index ce6eb02c9632..6a9a120a74a4 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -557,9 +557,8 @@ Result> SparseCOOIndex::Make( // Constructor with a contiguous NumericTensor SparseCOOIndex::SparseCOOIndex(const std::shared_ptr& coords) : SparseIndexBase(coords->shape()[0]), coords_(coords) { - ARROW_CHECK( - CheckSparseCOOIndexValidity(coords_->type(), coords_->shape(), coords_->strides()) - .ok()); + ARROW_CHECK_OK( + CheckSparseCOOIndexValidity(coords_->type(), coords_->shape(), coords_->strides())); } std::string SparseCOOIndex::ToString() const { return std::string("SparseCOOIndex"); } diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 4091335e1307..2e1406b9fca5 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -123,9 +123,9 @@ class TestSparseCOOTensorBase : public ::testing::Test { 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; auto dense_data = Buffer::Wrap(dense_values); NumericTensor dense_tensor(dense_data, shape_, {}, dim_names_); - ASSERT_OK( - SparseCOOTensor::Make(dense_tensor, TypeTraits::type_singleton()) - .Value(&sparse_tensor_from_dense_)); + ASSERT_OK_AND_ASSIGN(sparse_tensor_from_dense_, + SparseCOOTensor::Make( + dense_tensor, TypeTraits::type_singleton())); } protected: @@ -189,7 +189,7 @@ TEST_F(TestSparseCOOTensor, CreationFromNumericTensor1D) { NumericTensor dense_vector(dense_data, dense_shape); std::shared_ptr st; - ASSERT_OK(SparseCOOTensor::Make(dense_vector).Value(&st)); + ASSERT_OK_AND_ASSIGN(st, SparseCOOTensor::Make(dense_vector)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -215,7 +215,7 @@ TEST_F(TestSparseCOOTensor, CreationFromTensor) { Tensor tensor(int64(), buffer, this->shape_, {}, this->dim_names_); std::shared_ptr st; - ASSERT_OK(SparseCOOTensor::Make(tensor).Value(&st)); + ASSERT_OK_AND_ASSIGN(st, SparseCOOTensor::Make(tensor)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -237,7 +237,7 @@ TEST_F(TestSparseCOOTensor, CreationFromNonContiguousTensor) { Tensor tensor(int64(), buffer, this->shape_, strides); std::shared_ptr st; - ASSERT_OK(SparseCOOTensor::Make(tensor).Value(&st)); + ASSERT_OK_AND_ASSIGN(st, SparseCOOTensor::Make(tensor)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -256,10 +256,10 @@ TEST_F(TestSparseCOOTensor, TensorEquality) { NumericTensor tensor2(buffer2, this->shape_); std::shared_ptr st1; - ASSERT_OK(SparseCOOTensor::Make(tensor1).Value(&st1)); + ASSERT_OK_AND_ASSIGN(st1, SparseCOOTensor::Make(tensor1)); std::shared_ptr st2; - ASSERT_OK(SparseCOOTensor::Make(tensor2).Value(&st2)); + ASSERT_OK_AND_ASSIGN(st2, SparseCOOTensor::Make(tensor2)); ASSERT_TRUE(st1->Equals(*this->sparse_tensor_from_dense_)); ASSERT_FALSE(st1->Equals(*st2)); @@ -273,7 +273,7 @@ TEST_F(TestSparseCOOTensor, TestToTensor) { Tensor tensor(int64(), buffer, shape, {}, this->dim_names_); std::shared_ptr sparse_tensor; - ASSERT_OK(SparseCOOTensor::Make(tensor).Value(&sparse_tensor)); + ASSERT_OK_AND_ASSIGN(sparse_tensor, SparseCOOTensor::Make(tensor)); ASSERT_EQ(5, sparse_tensor->non_zero_length()); ASSERT_TRUE(sparse_tensor->is_mutable()); @@ -485,9 +485,9 @@ class TestSparseCSRMatrixBase : public ::testing::Test { 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; auto dense_data = Buffer::Wrap(dense_values); NumericTensor dense_tensor(dense_data, shape_, {}, dim_names_); - ASSERT_OK( - SparseCSRMatrix::Make(dense_tensor, TypeTraits::type_singleton()) - .Value(&sparse_tensor_from_dense_)); + ASSERT_OK_AND_ASSIGN(sparse_tensor_from_dense_, + SparseCSRMatrix::Make( + dense_tensor, TypeTraits::type_singleton())); } protected: @@ -505,7 +505,7 @@ TEST_F(TestSparseCSRMatrix, CreationFromNumericTensor2D) { NumericTensor tensor(buffer, this->shape_); std::shared_ptr st1; - ASSERT_OK(SparseCSRMatrix::Make(tensor).Value(&st1)); + ASSERT_OK_AND_ASSIGN(st1, SparseCSRMatrix::Make(tensor)); auto st2 = this->sparse_tensor_from_dense_; @@ -557,7 +557,7 @@ TEST_F(TestSparseCSRMatrix, CreationFromNonContiguousTensor) { Tensor tensor(int64(), buffer, this->shape_, strides); std::shared_ptr st; - ASSERT_OK(SparseCSRMatrix::Make(tensor).Value(&st)); + ASSERT_OK_AND_ASSIGN(st, SparseCSRMatrix::Make(tensor)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -599,8 +599,8 @@ TEST_F(TestSparseCSRMatrix, TensorEquality) { NumericTensor tensor2(buffer2, this->shape_); std::shared_ptr st1, st2; - ASSERT_OK(SparseCSRMatrix::Make(tensor1).Value(&st1)); - ASSERT_OK(SparseCSRMatrix::Make(tensor2).Value(&st2)); + ASSERT_OK_AND_ASSIGN(st1, SparseCSRMatrix::Make(tensor1)); + ASSERT_OK_AND_ASSIGN(st2, SparseCSRMatrix::Make(tensor2)); ASSERT_TRUE(st1->Equals(*this->sparse_tensor_from_dense_)); ASSERT_FALSE(st1->Equals(*st2)); @@ -614,7 +614,7 @@ TEST_F(TestSparseCSRMatrix, TestToTensor) { Tensor tensor(int64(), buffer, shape, {}, this->dim_names_); std::shared_ptr sparse_tensor; - ASSERT_OK(SparseCSRMatrix::Make(tensor).Value(&sparse_tensor)); + ASSERT_OK_AND_ASSIGN(sparse_tensor, SparseCSRMatrix::Make(tensor)); ASSERT_EQ(7, sparse_tensor->non_zero_length()); ASSERT_TRUE(sparse_tensor->is_mutable()); @@ -642,10 +642,10 @@ TYPED_TEST_P(TestSparseCSRMatrixForIndexValueType, Make) { std::vector indices_shape = {12}; std::shared_ptr si; - ASSERT_OK(SparseCSRIndex::Make(TypeTraits::type_singleton(), - indptr_shape, indices_shape, Buffer::Wrap(indptr_values), - Buffer::Wrap(indices_values)) - .Value(&si)); + ASSERT_OK_AND_ASSIGN( + si, SparseCSRIndex::Make(TypeTraits::type_singleton(), indptr_shape, + indices_shape, Buffer::Wrap(indptr_values), + Buffer::Wrap(indices_values))); std::vector sparse_values = {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}; auto sparse_data = Buffer::Wrap(sparse_values); From 4801c3f500aded07df0f45612938d07ce1883545 Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Tue, 3 Dec 2019 09:37:14 +0900 Subject: [PATCH 14/19] Make ValidateShape inline --- cpp/src/arrow/sparse_tensor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index e1bb1c0fe934..d15f1467ca08 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -121,7 +121,7 @@ class ARROW_EXPORT SparseCOOIndex : public internal::SparseIndexBaseEquals(*other.indices()); } - Status ValidateShape(const std::vector& shape) const override { + inline Status ValidateShape(const std::vector& shape) const override { ARROW_RETURN_NOT_OK(SparseIndex::ValidateShape(shape)); if (static_cast(coords_->shape()[1]) == shape.size()) { @@ -205,7 +205,7 @@ class ARROW_EXPORT SparseCSRIndex : public internal::SparseIndexBaseEquals(*other.indptr()) && indices()->Equals(*other.indices()); } - Status ValidateShape(const std::vector& shape) const override { + inline Status ValidateShape(const std::vector& shape) const override { ARROW_RETURN_NOT_OK(SparseIndex::ValidateShape(shape)); if (shape.size() < 2) { From 29913bdaae480a729be8b68f82b744c084adfa5c Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Tue, 3 Dec 2019 09:37:38 +0900 Subject: [PATCH 15/19] Remove a needless condition --- cpp/src/arrow/sparse_tensor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index d15f1467ca08..9e0bb3ed1963 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -216,7 +216,7 @@ class ARROW_EXPORT SparseCSRIndex : public internal::SparseIndexBaseshape()[0] == shape[0] + 1) { + if (indptr_->shape()[0] == shape[0] + 1) { return Status::OK(); } From 195cd92801d237dac1ffc06a1321437d2acacd5e Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Tue, 3 Dec 2019 10:30:28 +0900 Subject: [PATCH 16/19] Fix SparseTensorImpl::Make --- cpp/src/arrow/sparse_tensor.h | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index 9e0bb3ed1963..5194f2fec3ab 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -340,7 +340,7 @@ class SparseTensorImpl : public SparseTensor { : SparseTensorImpl(NULLPTR, type, NULLPTR, shape, dim_names) {} /// \brief Create a SparseTensor with full parameters - static Result>> Make( + static inline Result>> Make( const std::shared_ptr& sparse_index, const std::shared_ptr& type, const std::shared_ptr& data, const std::vector& shape, const std::vector& dim_names) { @@ -360,9 +360,9 @@ class SparseTensorImpl : public SparseTensor { /// /// The dense tensor is re-encoded as a sparse index and a physical /// data buffer for the non-zero value. - static Result>> Make( + static inline Result>> Make( const Tensor& tensor, const std::shared_ptr& index_value_type, - MemoryPool* pool) { + MemoryPool* pool = default_memory_pool()) { std::shared_ptr sparse_index; std::shared_ptr data; ARROW_RETURN_NOT_OK(internal::MakeSparseTensorFromTensor( @@ -373,21 +373,11 @@ class SparseTensorImpl : public SparseTensor { data, tensor.shape(), tensor.dim_names_); } - static Result>> Make( - const Tensor& tensor, const std::shared_ptr& index_value_type) { - return Make(tensor, index_value_type, default_memory_pool()); - } - - static Result>> Make( - const Tensor& tensor, MemoryPool* pool) { + static inline Result>> Make( + const Tensor& tensor, MemoryPool* pool = default_memory_pool()) { return Make(tensor, int64(), pool); } - static Result>> Make( - const Tensor& tensor) { - return Make(tensor, default_memory_pool()); - } - /// \brief Create a sparse tensor from a dense tensor /// /// The dense tensor is re-encoded as a sparse index and a physical From 6130d252bb483309465a304083dcf0a195bf118f Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Tue, 3 Dec 2019 11:54:35 +0900 Subject: [PATCH 17/19] Simplify Tensor::Make and NumericTensor::Make declarations --- cpp/src/arrow/tensor.h | 70 ++++-------------------------------------- 1 file changed, 6 insertions(+), 64 deletions(-) diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index cd77fb44b05c..aa7c1919cc25 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -85,47 +85,15 @@ class ARROW_EXPORT Tensor { /// \param[in] strides The strides of the tensor /// (if this is empty, the data assumed to be row-major) /// \param[in] dim_names The names of the tensor dimensions - static Result> Make(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape, - const std::vector& strides, - const std::vector& dim_names) { + static inline Result> Make( + const std::shared_ptr& type, const std::shared_ptr& data, + const std::vector& shape, const std::vector& strides = {}, + const std::vector& dim_names = {}) { ARROW_RETURN_NOT_OK( internal::ValidateTensorParameters(type, data, shape, strides, dim_names)); return std::make_shared(type, data, shape, strides, dim_names); } - /// \brief Create a Tensor with full parameters with empty dim_names - /// - /// This factory function will return Status::Invalid when the parameters are - /// inconsistent - /// - /// \param[in] type The data type of the tensor values - /// \param[in] data The buffer of the tensor content - /// \param[in] shape The shape of the tensor - /// \param[in] strides The strides of the tensor - static Result> Make(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape, - const std::vector& strides) { - return Make(type, data, shape, strides, {}); - } - - /// \brief Create a Tensor with full parameters with empty dim_names, the data assumed - /// to be row-major - /// - /// This factory function will return Status::Invalid when the - /// parameters are inconsistent - /// - /// \param[in] type The data type of the tensor values - /// \param[in] data The buffer of the tensor content - /// \param[in] shape The shape of the tensor - static Result> Make(const std::shared_ptr& type, - const std::shared_ptr& data, - const std::vector& shape) { - return Make(type, data, shape, {}, {}); - } - virtual ~Tensor() = default; /// Constructor with no dimension names or strides, data assumed to be row-major @@ -233,39 +201,13 @@ class NumericTensor : public Tensor { /// \param[in] dim_names The names of the tensor dimensions static Result>> Make( const std::shared_ptr& data, const std::vector& shape, - const std::vector& strides, const std::vector& dim_names) { + const std::vector& strides = {}, + const std::vector& dim_names = {}) { ARROW_RETURN_NOT_OK(internal::ValidateTensorParameters( TypeTraits::type_singleton(), data, shape, strides, dim_names)); return std::make_shared>(data, shape, strides, dim_names); } - /// \brief Create a NumericTensor with full parameters with empty dim_names - /// - /// This factory function will return Status::Invalid when the parameters are - /// inconsistent - /// - /// \param[in] data The buffer of the tensor content - /// \param[in] shape The shape of the tensor - /// \param[in] strides The strides of the tensor - static Result>> Make( - const std::shared_ptr& data, const std::vector& shape, - const std::vector& strides) { - return Make(data, shape, strides, {}); - } - - /// \brief Create a NumericTensor with full parameters with empty strides and empty - /// dim_names, the data assumed to be row-major - /// - /// This factory function will return Status::Invalid when the parameters are - /// inconsistent - /// - /// \param[in] data The buffer of the tensor content - /// \param[in] shape The shape of the tensor - static Result>> Make( - const std::shared_ptr& data, const std::vector& shape) { - return Make(data, shape, {}, {}); - } - /// Constructor with non-negative strides and dimension names NumericTensor(const std::shared_ptr& data, const std::vector& shape, const std::vector& strides, From e03f1f70a125cd78d97800179e509da44c00f24a Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Tue, 3 Dec 2019 12:19:17 +0900 Subject: [PATCH 18/19] Fix tests of Tensor and SparseTensor more appropriately --- cpp/src/arrow/sparse_tensor_test.cc | 50 ++++++++++++++------- cpp/src/arrow/tensor_test.cc | 69 +++++++++++++++++++++++------ 2 files changed, 90 insertions(+), 29 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 2e1406b9fca5..3cc5e8824334 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -52,18 +52,21 @@ TEST(TestSparseCOOIndex, Make) { 1, 0, 1, 1, 0, 3, 1, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 3}; auto data = Buffer::Wrap(values); std::vector shape = {12, 3}; - std::vector stride = {3 * sizeof(int32_t), sizeof(int32_t)}; + std::vector strides = {3 * sizeof(int32_t), sizeof(int32_t)}; // OK - auto res = SparseCOOIndex::Make(int32(), shape, stride, data); - ASSERT_OK(res); + std::shared_ptr si; + ASSERT_OK_AND_ASSIGN(si, SparseCOOIndex::Make(int32(), shape, strides, data)); + ASSERT_EQ(shape, si->indices()->shape()); + ASSERT_EQ(strides, si->indices()->strides()); + ASSERT_EQ(data->data(), si->indices()->raw_data()); // Non-integer type - res = SparseCOOIndex::Make(float32(), shape, stride, data); + auto res = SparseCOOIndex::Make(float32(), shape, strides, data); ASSERT_RAISES(Invalid, res); // Non-matrix indices - res = SparseCOOIndex::Make(int32(), {4, 3, 4}, stride, data); + res = SparseCOOIndex::Make(int32(), {4, 3, 4}, strides, data); ASSERT_RAISES(Invalid, res); // Non-contiguous indices @@ -81,13 +84,17 @@ TEST(TestSparseCSRIndex, Make) { std::vector indices_shape = {12}; // OK - auto res = SparseCSRIndex::Make(int32(), indptr_shape, indices_shape, indptr_data, - indices_data); - ASSERT_OK(res); + std::shared_ptr si; + ASSERT_OK_AND_ASSIGN(si, SparseCSRIndex::Make(int32(), indptr_shape, indices_shape, + indptr_data, indices_data)); + ASSERT_EQ(indptr_shape, si->indptr()->shape()); + ASSERT_EQ(indptr_data->data(), si->indptr()->raw_data()); + ASSERT_EQ(indices_shape, si->indices()->shape()); + ASSERT_EQ(indices_data->data(), si->indices()->raw_data()); // Non-integer type - res = SparseCSRIndex::Make(float32(), indptr_shape, indices_shape, indptr_data, - indices_data); + auto res = SparseCSRIndex::Make(float32(), indptr_shape, indices_shape, indptr_data, + indices_data); ASSERT_RAISES(Invalid, res); // Non-vector indptr shape @@ -331,16 +338,27 @@ TYPED_TEST_P(TestSparseCOOTensorForIndexValueType, Make) { std::shared_ptr st; // OK - auto res = - SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, this->dim_names_); - ASSERT_OK(res); + ASSERT_OK_AND_ASSIGN(st, SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, + this->dim_names_)); + ASSERT_EQ(int64(), st->type()); + ASSERT_EQ(this->shape_, st->shape()); + ASSERT_EQ(this->dim_names_, st->dim_names()); + ASSERT_EQ(sparse_data->data(), st->raw_data()); + ASSERT_TRUE( + internal::checked_pointer_cast(st->sparse_index())->Equals(*si)); // OK with an empty dim_names - res = SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, {}); - ASSERT_OK(res); + ASSERT_OK_AND_ASSIGN(st, + SparseCOOTensor::Make(si, int64(), sparse_data, this->shape_, {})); + ASSERT_EQ(int64(), st->type()); + ASSERT_EQ(this->shape_, st->shape()); + ASSERT_EQ(std::vector{}, st->dim_names()); + ASSERT_EQ(sparse_data->data(), st->raw_data()); + ASSERT_TRUE( + internal::checked_pointer_cast(st->sparse_index())->Equals(*si)); // invalid data type - res = SparseCOOTensor::Make(si, binary(), sparse_data, this->shape_, {}); + auto res = SparseCOOTensor::Make(si, binary(), sparse_data, this->shape_, {}); ASSERT_RAISES(Invalid, res); // negative items in shape diff --git a/cpp/src/arrow/tensor_test.cc b/cpp/src/arrow/tensor_test.cc index 945eabde18dc..2d91765874e9 100644 --- a/cpp/src/arrow/tensor_test.cc +++ b/cpp/src/arrow/tensor_test.cc @@ -39,29 +39,50 @@ void AssertCountNonZero(const Tensor& t, int64_t expected) { } TEST(TestTensor, Make) { - // without strides and dim_names std::vector shape = {3, 6}; + std::vector strides = {sizeof(double) * 6, sizeof(double)}; std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; auto data = Buffer::Wrap(values); + + // without strides and dim_names std::shared_ptr tensor1; - ASSERT_OK(Tensor::Make(float64(), data, shape).Value(&tensor1)); + ASSERT_OK_AND_ASSIGN(tensor1, Tensor::Make(float64(), data, shape)); + EXPECT_EQ(float64(), tensor1->type()); + EXPECT_EQ(shape, tensor1->shape()); + EXPECT_EQ(strides, tensor1->strides()); + EXPECT_EQ(std::vector{}, tensor1->dim_names()); + EXPECT_EQ(data->data(), tensor1->raw_data()); // without dim_names - std::vector strides = {sizeof(double) * 6, sizeof(double)}; std::shared_ptr tensor2; - ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, strides).Value(&tensor2)); + ASSERT_OK_AND_ASSIGN(tensor2, Tensor::Make(float64(), data, shape, strides)); + EXPECT_EQ(float64(), tensor2->type()); + EXPECT_EQ(shape, tensor2->shape()); + EXPECT_EQ(strides, tensor2->strides()); + EXPECT_EQ(std::vector{}, tensor2->dim_names()); + EXPECT_EQ(data->data(), tensor2->raw_data()); EXPECT_TRUE(tensor2->Equals(*tensor1)); // without strides std::vector dim_names = {"foo", "bar"}; std::shared_ptr tensor3; - ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, {}, dim_names).Value(&tensor3)); + ASSERT_OK_AND_ASSIGN(tensor3, Tensor::Make(float64(), data, shape, {}, dim_names)); + EXPECT_EQ(float64(), tensor3->type()); + EXPECT_EQ(shape, tensor3->shape()); + EXPECT_EQ(strides, tensor3->strides()); + EXPECT_EQ(dim_names, tensor3->dim_names()); + EXPECT_EQ(data->data(), tensor3->raw_data()); EXPECT_TRUE(tensor3->Equals(*tensor1)); EXPECT_TRUE(tensor3->Equals(*tensor2)); // supply all parameters std::shared_ptr tensor4; - ASSERT_OK(Tensor::Make(float64(), data, {3, 6}, strides, dim_names).Value(&tensor4)); + ASSERT_OK_AND_ASSIGN(tensor4, Tensor::Make(float64(), data, shape, strides, dim_names)); + EXPECT_EQ(float64(), tensor4->type()); + EXPECT_EQ(shape, tensor4->shape()); + EXPECT_EQ(strides, tensor4->strides()); + EXPECT_EQ(dim_names, tensor4->dim_names()); + EXPECT_EQ(data->data(), tensor4->raw_data()); EXPECT_TRUE(tensor4->Equals(*tensor1)); EXPECT_TRUE(tensor4->Equals(*tensor2)); EXPECT_TRUE(tensor4->Equals(*tensor3)); @@ -423,30 +444,52 @@ INSTANTIATE_TYPED_TEST_CASE_P(Float32, TestFloatTensor, FloatType); INSTANTIATE_TYPED_TEST_CASE_P(Float64, TestFloatTensor, DoubleType); TEST(TestNumericTensor, Make) { - // without strides and dim_names std::vector shape = {3, 6}; + std::vector strides = {sizeof(double) * 6, sizeof(double)}; std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; auto data = Buffer::Wrap(values); + + // without strides and dim_names std::shared_ptr> tensor1; - ASSERT_OK(NumericTensor::Make(data, shape).Value(&tensor1)); + ASSERT_OK_AND_ASSIGN(tensor1, NumericTensor::Make(data, shape)); + EXPECT_EQ(float64(), tensor1->type()); + EXPECT_EQ(shape, tensor1->shape()); + EXPECT_EQ(strides, tensor1->strides()); + EXPECT_EQ(data->data(), tensor1->raw_data()); + EXPECT_EQ(std::vector{}, tensor1->dim_names()); // without dim_names - std::vector strides = {sizeof(double) * 6, sizeof(double)}; std::shared_ptr> tensor2; - ASSERT_OK(NumericTensor::Make(data, {3, 6}, strides).Value(&tensor2)); + ASSERT_OK_AND_ASSIGN(tensor2, NumericTensor::Make(data, shape, strides)); + EXPECT_EQ(float64(), tensor2->type()); + EXPECT_EQ(shape, tensor2->shape()); + EXPECT_EQ(strides, tensor2->strides()); + EXPECT_EQ(std::vector{}, tensor2->dim_names()); + EXPECT_EQ(data->data(), tensor2->raw_data()); EXPECT_TRUE(tensor2->Equals(*tensor1)); // without strides std::vector dim_names = {"foo", "bar"}; std::shared_ptr> tensor3; - ASSERT_OK(NumericTensor::Make(data, {3, 6}, {}, dim_names).Value(&tensor3)); + ASSERT_OK_AND_ASSIGN(tensor3, + NumericTensor::Make(data, shape, {}, dim_names)); + EXPECT_EQ(float64(), tensor3->type()); + EXPECT_EQ(shape, tensor3->shape()); + EXPECT_EQ(strides, tensor3->strides()); + EXPECT_EQ(dim_names, tensor3->dim_names()); + EXPECT_EQ(data->data(), tensor3->raw_data()); EXPECT_TRUE(tensor3->Equals(*tensor1)); EXPECT_TRUE(tensor3->Equals(*tensor2)); // supply all parameters std::shared_ptr> tensor4; - ASSERT_OK( - NumericTensor::Make(data, {3, 6}, strides, dim_names).Value(&tensor4)); + ASSERT_OK_AND_ASSIGN(tensor4, + NumericTensor::Make(data, shape, strides, dim_names)); + EXPECT_EQ(float64(), tensor4->type()); + EXPECT_EQ(shape, tensor4->shape()); + EXPECT_EQ(strides, tensor4->strides()); + EXPECT_EQ(dim_names, tensor4->dim_names()); + EXPECT_EQ(data->data(), tensor4->raw_data()); EXPECT_TRUE(tensor4->Equals(*tensor1)); EXPECT_TRUE(tensor4->Equals(*tensor2)); EXPECT_TRUE(tensor4->Equals(*tensor3)); From 40a723b89410d980e8d758fb3cb19a2d9090d68e Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Tue, 3 Dec 2019 12:43:59 +0900 Subject: [PATCH 19/19] Let Tensor::Make support 0-dim tensors --- cpp/src/arrow/tensor.cc | 3 --- cpp/src/arrow/tensor_test.cc | 17 ++++++++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index c9cbfe258adf..8a924ba494a5 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -105,9 +105,6 @@ inline Status CheckTensorValidity(const std::shared_ptr& type, if (!data) { return Status::Invalid("Null data is supplied"); } - if (shape.size() == 0) { - return Status::Invalid("Empty shape is supplied"); - } if (!std::all_of(shape.begin(), shape.end(), [](int64_t x) { return x > 0; })) { return Status::Invalid("Shape elements must be positive"); } diff --git a/cpp/src/arrow/tensor_test.cc b/cpp/src/arrow/tensor_test.cc index 2d91765874e9..a0f2bf0f1c87 100644 --- a/cpp/src/arrow/tensor_test.cc +++ b/cpp/src/arrow/tensor_test.cc @@ -88,6 +88,20 @@ TEST(TestTensor, Make) { EXPECT_TRUE(tensor4->Equals(*tensor3)); } +TEST(TestTensor, MakeZeroDim) { + std::vector shape = {}; + std::vector values = {355 / 113.0}; + auto data = Buffer::Wrap(values); + std::shared_ptr tensor; + + ASSERT_OK_AND_ASSIGN(tensor, Tensor::Make(float64(), data, shape)); + EXPECT_EQ(1, tensor->size()); + EXPECT_EQ(shape, tensor->shape()); + EXPECT_EQ(shape, tensor->strides()); + EXPECT_EQ(data->data(), tensor->raw_data()); + EXPECT_EQ(values[0], tensor->Value({})); +} + TEST(TestTensor, MakeFailureCases) { std::vector shape = {3, 6}; std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; @@ -102,9 +116,6 @@ TEST(TestTensor, MakeFailureCases) { // null data ASSERT_RAISES(Invalid, Tensor::Make(float64(), nullptr, shape)); - // empty shape - ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, {})); - // negative items in shape ASSERT_RAISES(Invalid, Tensor::Make(float64(), data, {-3, 6}));