Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CSR SpGEAM #556

Merged
merged 4 commits into from
Jun 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 30 additions & 21 deletions common/matrix/csr_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -496,21 +496,23 @@ __global__ __launch_bounds__(spmv_block_size) void abstract_classical_spmv(

template <int subwarp_size, typename IndexType>
__global__ __launch_bounds__(default_block_size) void spgeam_nnz(
const IndexType *a_row_ptrs, const IndexType *a_col_idxs,
const IndexType *b_row_ptrs, const IndexType *b_col_idxs,
IndexType num_rows, IndexType *nnz)
const IndexType *__restrict__ a_row_ptrs,
const IndexType *__restrict__ a_col_idxs,
const IndexType *__restrict__ b_row_ptrs,
const IndexType *__restrict__ b_col_idxs, IndexType num_rows,
IndexType *__restrict__ nnz)
{
auto row = thread::get_subwarp_id_flat<subwarp_size, IndexType>();
const auto row = thread::get_subwarp_id_flat<subwarp_size, IndexType>();
auto subwarp =
group::tiled_partition<subwarp_size>(group::this_thread_block());
if (row >= num_rows) {
return;
}

auto a_begin = a_row_ptrs[row];
auto b_begin = b_row_ptrs[row];
auto a_size = a_row_ptrs[row + 1] - a_begin;
auto b_size = b_row_ptrs[row + 1] - b_begin;
const auto a_begin = a_row_ptrs[row];
const auto b_begin = b_row_ptrs[row];
const auto a_size = a_row_ptrs[row + 1] - a_begin;
const auto b_size = b_row_ptrs[row + 1] - b_begin;
IndexType count{};
group_merge<subwarp_size>(
a_col_idxs + a_begin, a_size, b_col_idxs + b_begin, b_size, subwarp,
Expand All @@ -528,28 +530,35 @@ __global__ __launch_bounds__(default_block_size) void spgeam_nnz(

template <int subwarp_size, typename ValueType, typename IndexType>
__global__ __launch_bounds__(default_block_size) void spgeam(
ValueType alpha, const IndexType *a_row_ptrs, const IndexType *a_col_idxs,
const ValueType *a_vals, ValueType beta, const IndexType *b_row_ptrs,
const IndexType *b_col_idxs, const ValueType *b_vals, IndexType num_rows,
const IndexType *c_row_ptrs, IndexType *c_col_idxs, ValueType *c_vals)
{
auto row = thread::get_subwarp_id_flat<subwarp_size, IndexType>();
const ValueType *__restrict__ palpha,
const IndexType *__restrict__ a_row_ptrs,
const IndexType *__restrict__ a_col_idxs,
const ValueType *__restrict__ a_vals, const ValueType *__restrict__ pbeta,
const IndexType *__restrict__ b_row_ptrs,
const IndexType *__restrict__ b_col_idxs,
const ValueType *__restrict__ b_vals, IndexType num_rows,
const IndexType *__restrict__ c_row_ptrs,
IndexType *__restrict__ c_col_idxs, ValueType *__restrict__ c_vals)
{
const auto row = thread::get_subwarp_id_flat<subwarp_size, IndexType>();
auto subwarp =
group::tiled_partition<subwarp_size>(group::this_thread_block());
if (row >= num_rows) {
return;
}

auto lane = static_cast<IndexType>(subwarp.thread_rank());
const auto alpha = palpha[0];
const auto beta = pbeta[0];
const auto lane = static_cast<IndexType>(subwarp.thread_rank());
constexpr auto lanemask_full =
~config::lane_mask_type{} >> (config::warp_size - subwarp_size);
auto lanemask_eq = config::lane_mask_type{1} << lane;
auto lanemask_lt = lanemask_eq - 1;
const auto lanemask_eq = config::lane_mask_type{1} << lane;
const auto lanemask_lt = lanemask_eq - 1;

auto a_begin = a_row_ptrs[row];
auto b_begin = b_row_ptrs[row];
auto a_size = a_row_ptrs[row + 1] - a_begin;
auto b_size = b_row_ptrs[row + 1] - b_begin;
const auto a_begin = a_row_ptrs[row];
const auto b_begin = b_row_ptrs[row];
const auto a_size = a_row_ptrs[row + 1] - a_begin;
const auto b_size = b_row_ptrs[row + 1] - b_begin;
auto c_begin = c_row_ptrs[row];
bool skip_first{};
group_merge<subwarp_size>(
Expand Down
5 changes: 5 additions & 0 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,11 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);

template <typename ValueType, typename IndexType>
GKO_DECLARE_CSR_SPGEAM_KERNEL(ValueType, IndexType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL);

template <typename ValueType, typename IndexType>
GKO_DECLARE_CSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
Expand Down
8 changes: 8 additions & 0 deletions core/matrix/csr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/matrix/coo.hpp>
#include <ginkgo/core/matrix/dense.hpp>
#include <ginkgo/core/matrix/ell.hpp>
#include <ginkgo/core/matrix/identity.hpp>
#include <ginkgo/core/matrix/sellp.hpp>
#include <ginkgo/core/matrix/sparsity_csr.hpp>

Expand All @@ -57,6 +58,7 @@ GKO_REGISTER_OPERATION(spmv, csr::spmv);
GKO_REGISTER_OPERATION(advanced_spmv, csr::advanced_spmv);
GKO_REGISTER_OPERATION(spgemm, csr::spgemm);
GKO_REGISTER_OPERATION(advanced_spgemm, csr::advanced_spgemm);
GKO_REGISTER_OPERATION(spgeam, csr::spgeam);
GKO_REGISTER_OPERATION(convert_to_coo, csr::convert_to_coo);
GKO_REGISTER_OPERATION(convert_to_dense, csr::convert_to_dense);
GKO_REGISTER_OPERATION(convert_to_sellp, csr::convert_to_sellp);
Expand Down Expand Up @@ -111,6 +113,12 @@ void Csr<ValueType, IndexType>::apply_impl(const LinOp *alpha, const LinOp *b,
this->get_executor()->run(
csr::make_advanced_spgemm(as<Dense>(alpha), this, b_csr,
as<Dense>(beta), x_copy.get(), x_csr));
} else if (dynamic_cast<const Identity<ValueType> *>(b)) {
// if b is an identity matrix, we compute an SpGEAM
auto x_csr = as<TCsr>(x);
auto x_copy = x_csr->clone();
this->get_executor()->run(csr::make_spgeam(
as<Dense>(alpha), this, as<Dense>(beta), lend(x_copy), x_csr));
} else {
// otherwise we assume that b is dense and compute a SpMV/SpMM
this->get_executor()->run(
Expand Down
10 changes: 10 additions & 0 deletions core/matrix/csr_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ namespace kernels {
const matrix::Csr<ValueType, IndexType> *d, \
matrix::Csr<ValueType, IndexType> *c)

#define GKO_DECLARE_CSR_SPGEAM_KERNEL(ValueType, IndexType) \
void spgeam(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Dense<ValueType> *alpha, \
const matrix::Csr<ValueType, IndexType> *a, \
const matrix::Dense<ValueType> *beta, \
const matrix::Csr<ValueType, IndexType> *b, \
matrix::Csr<ValueType, IndexType> *c)

#define GKO_DECLARE_CSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \
void convert_to_dense(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType> *source, \
Expand Down Expand Up @@ -176,6 +184,8 @@ namespace kernels {
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_SPGEAM_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_CONVERT_TO_COO_KERNEL(ValueType, IndexType); \
Expand Down
70 changes: 70 additions & 0 deletions cuda/matrix/csr_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ using compiled_kernels = syn::value_list<int, 3, 4, 6, 7, 8, 12, 14>;
using classical_kernels =
syn::value_list<int, config::warp_size, 32, 16, 8, 4, 2, 1>;

using spgeam_kernels =
syn::value_list<int, 1, 2, 4, 8, 16, 32, config::warp_size>;


#include "common/matrix/csr_kernels.hpp.inc"

Expand Down Expand Up @@ -580,6 +583,73 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);


namespace {


template <int subwarp_size, typename ValueType, typename IndexType>
void spgeam(syn::value_list<int, subwarp_size>,
std::shared_ptr<const DefaultExecutor> exec, const ValueType *alpha,
const IndexType *a_row_ptrs, const IndexType *a_col_idxs,
const ValueType *a_vals, const ValueType *beta,
const IndexType *b_row_ptrs, const IndexType *b_col_idxs,
const ValueType *b_vals, matrix::Csr<ValueType, IndexType> *c)
{
auto m = static_cast<IndexType>(c->get_size()[0]);
auto c_row_ptrs = c->get_row_ptrs();
// count nnz for alpha * A + beta * B
auto subwarps_per_block = default_block_size / subwarp_size;
auto num_blocks = ceildiv(m, subwarps_per_block);
kernel::spgeam_nnz<subwarp_size><<<num_blocks, default_block_size>>>(
a_row_ptrs, a_col_idxs, b_row_ptrs, b_col_idxs, m, c_row_ptrs);

// build row pointers
components::prefix_sum(exec, c_row_ptrs, m + 1);

// accumulate non-zeros for alpha * A + beta * B
matrix::CsrBuilder<ValueType, IndexType> c_builder{c};
auto c_nnz = exec->copy_val_to_host(c_row_ptrs + m);
c_builder.get_col_idx_array().resize_and_reset(c_nnz);
c_builder.get_value_array().resize_and_reset(c_nnz);
auto c_col_idxs = c->get_col_idxs();
auto c_vals = c->get_values();
kernel::spgeam<subwarp_size><<<num_blocks, default_block_size>>>(
as_cuda_type(alpha), a_row_ptrs, a_col_idxs, as_cuda_type(a_vals),
as_cuda_type(beta), b_row_ptrs, b_col_idxs, as_cuda_type(b_vals), m,
c_row_ptrs, c_col_idxs, as_cuda_type(c_vals));
}

upsj marked this conversation as resolved.
Show resolved Hide resolved
GKO_ENABLE_IMPLEMENTATION_SELECTION(select_spgeam, spgeam);


} // namespace


template <typename ValueType, typename IndexType>
void spgeam(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Dense<ValueType> *alpha,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Dense<ValueType> *beta,
const matrix::Csr<ValueType, IndexType> *b,
matrix::Csr<ValueType, IndexType> *c)
{
auto total_nnz =
a->get_num_stored_elements() + b->get_num_stored_elements();
auto nnz_per_row = total_nnz / a->get_size()[0];
select_spgeam(spgeam_kernels(),
[&](int compiled_subwarp_size) {
return compiled_subwarp_size >= nnz_per_row ||
compiled_subwarp_size == config::warp_size;
},
syn::value_list<int>(), syn::type_list<>(), exec,
alpha->get_const_values(), a->get_const_row_ptrs(),
a->get_const_col_idxs(), a->get_const_values(),
beta->get_const_values(), b->get_const_row_ptrs(),
b->get_const_col_idxs(), b->get_const_values(), c);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL);


template <typename IndexType>
void convert_row_ptrs_to_idxs(std::shared_ptr<const CudaExecutor> exec,
const IndexType *ptrs, size_type num_rows,
Expand Down
23 changes: 23 additions & 0 deletions cuda/test/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/matrix/dense.hpp>
#include <ginkgo/core/matrix/ell.hpp>
#include <ginkgo/core/matrix/hybrid.hpp>
#include <ginkgo/core/matrix/identity.hpp>
#include <ginkgo/core/matrix/sellp.hpp>
#include <ginkgo/core/matrix/sparsity_csr.hpp>

Expand Down Expand Up @@ -384,6 +385,28 @@ TEST_F(Csr, SimpleApplyToCsrMatrixIsEquivalentToRef)
}


TEST_F(Csr, AdvancedApplyToIdentityMatrixIsEquivalentToRef)
{
set_up_apply_data(std::make_shared<Mtx::automatical>());
auto a = gen_mtx<Mtx>(mtx_size[0], mtx_size[1], 0);
auto b = gen_mtx<Mtx>(mtx_size[0], mtx_size[1], 0);
auto da = Mtx::create(cuda);
auto db = Mtx::create(cuda);
da->copy_from(a.get());
db->copy_from(b.get());
auto id = gko::matrix::Identity<Mtx::value_type>::create(ref, mtx_size[1]);
auto did =
gko::matrix::Identity<Mtx::value_type>::create(cuda, mtx_size[1]);

a->apply(alpha.get(), id.get(), beta.get(), b.get());
da->apply(dalpha.get(), did.get(), dbeta.get(), db.get());

GKO_ASSERT_MTX_NEAR(b, db, 1e-14);
GKO_ASSERT_MTX_EQ_SPARSITY(b, db);
ASSERT_TRUE(db->is_sorted_by_column_index());
}


TEST_F(Csr, TransposeIsEquivalentToRef)
{
set_up_apply_data(std::make_shared<Mtx::automatical>(cuda));
Expand Down
40 changes: 32 additions & 8 deletions hip/matrix/csr_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -536,9 +536,9 @@ namespace {

template <int subwarp_size, typename ValueType, typename IndexType>
void spgeam(syn::value_list<int, subwarp_size>,
std::shared_ptr<const HipExecutor> exec, ValueType alpha,
std::shared_ptr<const HipExecutor> exec, const ValueType *alpha,
const IndexType *a_row_ptrs, const IndexType *a_col_idxs,
const ValueType *a_vals, ValueType beta,
const ValueType *a_vals, const ValueType *beta,
const IndexType *b_row_ptrs, const IndexType *b_col_idxs,
const ValueType *b_vals, matrix::Csr<ValueType, IndexType> *c)
{
Expand Down Expand Up @@ -570,7 +570,6 @@ void spgeam(syn::value_list<int, subwarp_size>,
c_col_idxs, as_hip_type(c_vals));
}


GKO_ENABLE_IMPLEMENTATION_SELECTION(select_spgeam, spgeam);


Expand Down Expand Up @@ -650,18 +649,17 @@ void advanced_spgemm(std::shared_ptr<const HipExecutor> exec,
hipsparse::destroy(b_descr);
hipsparse::destroy(a_descr);

auto valpha = exec->copy_val_to_host(alpha->get_const_values());
auto vbeta = exec->copy_val_to_host(beta->get_const_values());
auto total_nnz = c_nnz + d->get_num_stored_elements();
auto nnz_per_row = total_nnz / m;
select_spgeam(spgeam_kernels(),
[&](int compiled_subwarp_size) {
return compiled_subwarp_size >= nnz_per_row ||
compiled_subwarp_size == config::warp_size;
},
syn::value_list<int>(), syn::type_list<>(), exec, valpha,
c_tmp_row_ptrs, c_tmp_col_idxs, c_tmp_vals, vbeta,
d_row_ptrs, d_col_idxs, d_vals, c);
syn::value_list<int>(), syn::type_list<>(), exec,
alpha->get_const_values(), c_tmp_row_ptrs, c_tmp_col_idxs,
c_tmp_vals, beta->get_const_values(), d_row_ptrs,
d_col_idxs, d_vals, c);
} else {
GKO_NOT_IMPLEMENTED;
}
Expand All @@ -671,6 +669,32 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);


template <typename ValueType, typename IndexType>
void spgeam(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Dense<ValueType> *alpha,
const matrix::Csr<ValueType, IndexType> *a,
const matrix::Dense<ValueType> *beta,
const matrix::Csr<ValueType, IndexType> *b,
matrix::Csr<ValueType, IndexType> *c)
{
auto total_nnz =
a->get_num_stored_elements() + b->get_num_stored_elements();
auto nnz_per_row = total_nnz / a->get_size()[0];
select_spgeam(spgeam_kernels(),
[&](int compiled_subwarp_size) {
return compiled_subwarp_size >= nnz_per_row ||
compiled_subwarp_size == config::warp_size;
},
syn::value_list<int>(), syn::type_list<>(), exec,
alpha->get_const_values(), a->get_const_row_ptrs(),
a->get_const_col_idxs(), a->get_const_values(),
beta->get_const_values(), b->get_const_row_ptrs(),
b->get_const_col_idxs(), b->get_const_values(), c);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL);


template <typename IndexType>
void convert_row_ptrs_to_idxs(std::shared_ptr<const HipExecutor> exec,
const IndexType *ptrs, size_type num_rows,
Expand Down
22 changes: 22 additions & 0 deletions hip/test/matrix/csr_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/matrix/dense.hpp>
#include <ginkgo/core/matrix/ell.hpp>
#include <ginkgo/core/matrix/hybrid.hpp>
#include <ginkgo/core/matrix/identity.hpp>
#include <ginkgo/core/matrix/sellp.hpp>
#include <ginkgo/core/matrix/sparsity_csr.hpp>

Expand Down Expand Up @@ -370,6 +371,27 @@ TEST_F(Csr, SimpleApplyToCsrMatrixIsEquivalentToRef)
}


TEST_F(Csr, AdvancedApplyToIdentityMatrixIsEquivalentToRef)
{
set_up_apply_data(std::make_shared<Mtx::automatical>(hip));
auto a = gen_mtx<Mtx>(mtx_size[0], mtx_size[1], 0);
auto b = gen_mtx<Mtx>(mtx_size[0], mtx_size[1], 0);
auto da = Mtx::create(hip);
auto db = Mtx::create(hip);
da->copy_from(a.get());
db->copy_from(b.get());
auto id = gko::matrix::Identity<Mtx::value_type>::create(ref, mtx_size[1]);
auto did = gko::matrix::Identity<Mtx::value_type>::create(hip, mtx_size[1]);

a->apply(alpha.get(), id.get(), beta.get(), b.get());
da->apply(dalpha.get(), did.get(), dbeta.get(), db.get());

GKO_ASSERT_MTX_NEAR(b, db, 1e-14);
GKO_ASSERT_MTX_EQ_SPARSITY(b, db);
ASSERT_TRUE(db->is_sorted_by_column_index());
}


TEST_F(Csr, TransposeIsEquivalentToRef)
{
set_up_apply_data(std::make_shared<Mtx::automatical>(hip));
Expand Down
Loading