Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions cpp/src/arrow/ipc/read_write_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1286,9 +1286,10 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCOOIndexRowMajor) {
1, 1, 0, 1, 1, 2, 1, 2, 1, 1, 2, 3};
const int sizeof_index_value = sizeof(c_index_value_type);
std::shared_ptr<SparseCOOIndex> si;
ASSERT_OK(SparseCOOIndex::Make(TypeTraits<IndexValueType>::type_singleton(), {12, 3},
{sizeof_index_value * 3, sizeof_index_value},
Buffer::Wrap(coords_values), &si));
ASSERT_OK_AND_ASSIGN(
si, SparseCOOIndex::Make(TypeTraits<IndexValueType>::type_singleton(), {12, 3},
{sizeof_index_value * 3, sizeof_index_value},
Buffer::Wrap(coords_values)));

std::vector<int64_t> shape = {2, 3, 4};
std::vector<std::string> dim_names = {"foo", "bar", "baz"};
Expand Down Expand Up @@ -1331,9 +1332,10 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCOOIndexColumnMajor) {
0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3};
const int sizeof_index_value = sizeof(c_index_value_type);
std::shared_ptr<SparseCOOIndex> si;
ASSERT_OK(SparseCOOIndex::Make(TypeTraits<IndexValueType>::type_singleton(), {12, 3},
{sizeof_index_value, sizeof_index_value * 12},
Buffer::Wrap(coords_values), &si));
ASSERT_OK_AND_ASSIGN(
si, SparseCOOIndex::Make(TypeTraits<IndexValueType>::type_singleton(), {12, 3},
{sizeof_index_value, sizeof_index_value * 12},
Buffer::Wrap(coords_values)));

std::vector<int64_t> shape = {2, 3, 4};
std::vector<std::string> dim_names = {"foo", "bar", "baz"};
Expand All @@ -1358,7 +1360,8 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCSRIndex) {
auto data = Buffer::Wrap(values);
NumericTensor<Int64Type> t(data, shape, {}, dim_names);
std::shared_ptr<SparseCSRMatrix> st;
ASSERT_OK(SparseCSRMatrix::Make(t, TypeTraits<IndexValueType>::type_singleton(), &st));
ASSERT_OK_AND_ASSIGN(
st, SparseCSRMatrix::Make(t, TypeTraits<IndexValueType>::type_singleton()));

this->CheckSparseTensorRoundTrip(*st);
}
Expand Down
12 changes: 7 additions & 5 deletions cpp/src/arrow/ipc/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1031,8 +1031,9 @@ Status ReadSparseTensorPayload(const IpcPayload& payload,
std::shared_ptr<DataType> indices_type;
RETURN_NOT_OK(internal::GetSparseCOOIndexMetadata(
sparse_tensor->sparseIndex_as_SparseTensorIndexCOO(), &indices_type));
RETURN_NOT_OK(SparseCOOIndex::Make(indices_type, shape, non_zero_length,
payload.body_buffers[0], &sparse_index));
ARROW_ASSIGN_OR_RAISE(sparse_index,
SparseCOOIndex::Make(indices_type, shape, non_zero_length,
payload.body_buffers[0]));
return MakeSparseTensorWithSparseCOOIndex(type, shape, dim_names, sparse_index,
non_zero_length, payload.body_buffers[1],
out);
Expand All @@ -1046,9 +1047,10 @@ Status ReadSparseTensorPayload(const IpcPayload& payload,
sparse_tensor->sparseIndex_as_SparseMatrixIndexCSR(), &indptr_type,
&indices_type));
ARROW_CHECK_EQ(indptr_type, indices_type);
RETURN_NOT_OK(SparseCSRIndex::Make(indices_type, shape, non_zero_length,
payload.body_buffers[0], payload.body_buffers[1],
&sparse_index));
ARROW_ASSIGN_OR_RAISE(
sparse_index,
SparseCSRIndex::Make(indices_type, shape, non_zero_length,
payload.body_buffers[0], payload.body_buffers[1]));
return MakeSparseTensorWithSparseCSRIndex(type, shape, dim_names, sparse_index,
non_zero_length, payload.body_buffers[2],
out);
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/python/numpy_convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -414,12 +414,12 @@ Status NdarraysToSparseCSRMatrix(MemoryPool* pool, PyObject* data_ao, PyObject*

Status TensorToSparseCOOTensor(const std::shared_ptr<Tensor>& tensor,
std::shared_ptr<SparseCOOTensor>* out) {
return SparseCOOTensor::Make(*tensor, out);
return SparseCOOTensor::Make(*tensor).Value(out);
}

Status TensorToSparseCSRMatrix(const std::shared_ptr<Tensor>& tensor,
std::shared_ptr<SparseCSRMatrix>* out) {
return SparseCSRMatrix::Make(*tensor, out);
return SparseCSRMatrix::Make(*tensor).Value(out);
}

} // namespace py
Expand Down
122 changes: 86 additions & 36 deletions cpp/src/arrow/sparse_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "arrow/sparse_tensor.h"

#include <algorithm>
#include <functional>
#include <limits>
#include <memory>
Expand All @@ -28,6 +29,17 @@

namespace arrow {

// ----------------------------------------------------------------------
// SparseIndex

Status SparseIndex::ValidateShape(const std::vector<int64_t>& shape) const {
if (!std::all_of(shape.begin(), shape.end(), [](int64_t x) { return x > 0; })) {
return Status::Invalid("Shape elements must be positive");
}

return Status::OK();
}

namespace {

// ----------------------------------------------------------------------
Expand Down Expand Up @@ -503,72 +515,110 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
// ----------------------------------------------------------------------
// SparseCOOIndex

Status SparseCOOIndex::Make(std::shared_ptr<DataType> indices_type,
const std::vector<int64_t>& indices_shape,
const std::vector<int64_t>& indices_strides,
std::shared_ptr<Buffer> indices_data,
std::shared_ptr<SparseCOOIndex>* out) {
*out = std::make_shared<SparseCOOIndex>(std::make_shared<Tensor>(
indices_type, indices_data, indices_shape, indices_strides));
namespace {

inline Status CheckSparseCOOIndexValidity(const std::shared_ptr<DataType>& type,
const std::vector<int64_t>& shape,
const std::vector<int64_t>& strides) {
if (!is_integer(type->id())) {
return Status::Invalid("Type of SparseCOOIndex indices must be integer");
}
if (shape.size() != 2) {
return Status::Invalid("SparseCOOIndex indices must be a matrix");
}
if (!internal::IsTensorStridesContiguous(type, shape, strides)) {
return Status::Invalid("SparseCOOIndex indices must be contiguous");
}
return Status::OK();
}

Status SparseCOOIndex::Make(std::shared_ptr<DataType> indices_type,
const std::vector<int64_t>& shape, int64_t non_zero_length,
std::shared_ptr<Buffer> indices_data,
std::shared_ptr<SparseCOOIndex>* out) {
} // namespace

Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
const std::shared_ptr<DataType>& indices_type,
const std::vector<int64_t>& indices_shape,
const std::vector<int64_t>& indices_strides, std::shared_ptr<Buffer> indices_data) {
RETURN_NOT_OK(
CheckSparseCOOIndexValidity(indices_type, indices_shape, indices_strides));
return std::make_shared<SparseCOOIndex>(std::make_shared<Tensor>(
indices_type, indices_data, indices_shape, indices_strides));
}

Result<std::shared_ptr<SparseCOOIndex>> SparseCOOIndex::Make(
const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
int64_t non_zero_length, std::shared_ptr<Buffer> indices_data) {
auto ndim = static_cast<int64_t>(shape.size());
const int64_t elsize = sizeof(indices_type.get());
std::vector<int64_t> indices_shape({non_zero_length, ndim});
std::vector<int64_t> indices_strides({elsize, elsize * non_zero_length});
return SparseCOOIndex::Make(indices_type, indices_shape, indices_strides, indices_data,
out);
return Make(indices_type, indices_shape, indices_strides, indices_data);
}

// Constructor with a contiguous NumericTensor
SparseCOOIndex::SparseCOOIndex(const std::shared_ptr<Tensor>& coords)
: SparseIndexBase(coords->shape()[0]), coords_(coords) {
ARROW_CHECK(is_integer(coords_->type_id()));
ARROW_CHECK(coords_->is_contiguous());
ARROW_CHECK_EQ(2, coords_->ndim());
ARROW_CHECK_OK(
CheckSparseCOOIndexValidity(coords_->type(), coords_->shape(), coords_->strides()));
}

std::string SparseCOOIndex::ToString() const { return std::string("SparseCOOIndex"); }

// ----------------------------------------------------------------------
// SparseCSRIndex

Status SparseCSRIndex::Make(const std::shared_ptr<DataType> indices_type,
const std::vector<int64_t>& indptr_shape,
const std::vector<int64_t>& indices_shape,
std::shared_ptr<Buffer> indptr_data,
std::shared_ptr<Buffer> indices_data,
std::shared_ptr<SparseCSRIndex>* out) {
*out = std::make_shared<SparseCSRIndex>(
std::make_shared<Tensor>(indices_type, indptr_data, indptr_shape),
std::make_shared<Tensor>(indices_type, indices_data, indices_shape));
namespace {

inline Status CheckSparseCSRIndexValidity(const std::shared_ptr<DataType>& indptr_type,
const std::shared_ptr<DataType>& indices_type,
const std::vector<int64_t>& indptr_shape,
const std::vector<int64_t>& indices_shape) {
if (!is_integer(indptr_type->id())) {
return Status::Invalid("Type of SparseCSRIndex indptr must be integer");
}
if (indptr_shape.size() != 1) {
return Status::Invalid("SparseCSRIndex indptr must be a vector");
}
if (!is_integer(indices_type->id())) {
return Status::Invalid("Type of SparseCSRIndex indices must be integer");
}
if (indices_shape.size() != 1) {
return Status::Invalid("SparseCSRIndex indices must be a vector");
}
return Status::OK();
}

Status SparseCSRIndex::Make(const std::shared_ptr<DataType> indices_type,
const std::vector<int64_t>& shape, int64_t non_zero_length,
std::shared_ptr<Buffer> indptr_data,
std::shared_ptr<Buffer> indices_data,
std::shared_ptr<SparseCSRIndex>* out) {
} // namespace

Result<std::shared_ptr<SparseCSRIndex>> SparseCSRIndex::Make(
const std::shared_ptr<DataType>& indptr_type,
const std::shared_ptr<DataType>& indices_type,
const std::vector<int64_t>& indptr_shape, const std::vector<int64_t>& indices_shape,
std::shared_ptr<Buffer> indptr_data, std::shared_ptr<Buffer> indices_data) {
RETURN_NOT_OK(CheckSparseCSRIndexValidity(indptr_type, indices_type, indptr_shape,
indices_shape));
return std::make_shared<SparseCSRIndex>(
std::make_shared<Tensor>(indptr_type, indptr_data, indptr_shape),
std::make_shared<Tensor>(indices_type, indices_data, indices_shape));
}

Result<std::shared_ptr<SparseCSRIndex>> SparseCSRIndex::Make(
const std::shared_ptr<DataType>& indptr_type,
const std::shared_ptr<DataType>& indices_type, const std::vector<int64_t>& shape,
int64_t non_zero_length, std::shared_ptr<Buffer> indptr_data,
std::shared_ptr<Buffer> indices_data) {
std::vector<int64_t> indptr_shape({shape[0] + 1});
std::vector<int64_t> indices_shape({non_zero_length});
return SparseCSRIndex::Make(indices_type, indptr_shape, indices_shape, indptr_data,
indices_data, out);
return Make(indptr_type, indices_type, indptr_shape, indices_shape, indptr_data,
indices_data);
}

// Constructor with two index vectors
SparseCSRIndex::SparseCSRIndex(const std::shared_ptr<Tensor>& indptr,
const std::shared_ptr<Tensor>& indices)
: SparseIndexBase(indices->shape()[0]), indptr_(indptr), indices_(indices) {
ARROW_CHECK(is_integer(indptr_->type_id()));
ARROW_CHECK_EQ(1, indptr_->ndim());
ARROW_CHECK(is_integer(indices_->type_id()));
ARROW_CHECK_EQ(1, indices_->ndim());
ARROW_CHECK(CheckSparseCSRIndexValidity(indptr_->type(), indices_->type(),
indptr_->shape(), indices_->shape())
.ok());
}

std::string SparseCSRIndex::ToString() const { return std::string("SparseCSRIndex"); }
Expand Down
Loading