diff --git a/common/matrix/csr_kernels.hpp.inc b/common/matrix/csr_kernels.hpp.inc index 8f7e1a3f5a2..edd6014154b 100644 --- a/common/matrix/csr_kernels.hpp.inc +++ b/common/matrix/csr_kernels.hpp.inc @@ -1062,4 +1062,30 @@ __global__ __launch_bounds__(default_block_size) void inv_row_permute_kernel( out_cols[out_begin + i] = in_cols[in_begin + i]; out_vals[out_begin + i] = in_vals[in_begin + i]; } +} + + +template +__global__ __launch_bounds__(default_block_size) void inv_symm_permute_kernel( + size_type num_rows, const IndexType *__restrict__ permutation, + const IndexType *__restrict__ in_row_ptrs, + const IndexType *__restrict__ in_cols, + const ValueType *__restrict__ in_vals, + const IndexType *__restrict__ out_row_ptrs, + IndexType *__restrict__ out_cols, ValueType *__restrict__ out_vals) +{ + auto tid = thread::get_subwarp_id_flat(); + if (tid >= num_rows) { + return; + } + auto lane = threadIdx.x % subwarp_size; + auto in_row = tid; + auto out_row = permutation[tid]; + auto in_begin = in_row_ptrs[in_row]; + auto in_size = in_row_ptrs[in_row + 1] - in_begin; + auto out_begin = out_row_ptrs[out_row]; + for (IndexType i = lane; i < in_size; i += subwarp_size) { + out_cols[out_begin + i] = permutation[in_cols[in_begin + i]]; + out_vals[out_begin + i] = in_vals[in_begin + i]; + } } \ No newline at end of file diff --git a/common/matrix/dense_kernels.hpp.inc b/common/matrix/dense_kernels.hpp.inc index 747356e4594..9df9caf0353 100644 --- a/common/matrix/dense_kernels.hpp.inc +++ b/common/matrix/dense_kernels.hpp.inc @@ -421,16 +421,48 @@ __global__ __launch_bounds__(default_block_size) void reduce_total_cols( } -template -__global__ __launch_bounds__(block_size) void row_gather( +template +__global__ __launch_bounds__(default_block_size) void symm_permute( size_type num_rows, size_type num_cols, const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig, size_type stride_orig, ValueType *__restrict__ result, size_type stride_result) { - constexpr auto warps_per_block = block_size / config::warp_size; - const auto global_id = - thread::get_thread_id(); + const auto global_id = thread::get_thread_id_flat(); + const auto row_id = global_id / num_cols; + const auto col_id = global_id % num_cols; + if (row_id < num_rows) { + result[row_id * stride_result + col_id] = + orig[perm_idxs[row_id] * stride_orig + perm_idxs[col_id]]; + } +} + + +template +__global__ __launch_bounds__(default_block_size) void inv_symm_permute( + size_type num_rows, size_type num_cols, + const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig, + size_type stride_orig, ValueType *__restrict__ result, + size_type stride_result) +{ + const auto global_id = thread::get_thread_id_flat(); + const auto row_id = global_id / num_cols; + const auto col_id = global_id % num_cols; + if (row_id < num_rows) { + result[perm_idxs[row_id] * stride_result + perm_idxs[col_id]] = + orig[row_id * stride_orig + col_id]; + } +} + + +template +__global__ __launch_bounds__(default_block_size) void row_gather( + size_type num_rows, size_type num_cols, + const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig, + size_type stride_orig, ValueType *__restrict__ result, + size_type stride_result) +{ + const auto global_id = thread::get_thread_id_flat(); const auto row_id = global_id / num_cols; const auto col_id = global_id % num_cols; if (row_id < num_rows) { @@ -440,16 +472,14 @@ __global__ __launch_bounds__(block_size) void row_gather( } -template -__global__ __launch_bounds__(block_size) void column_permute( +template +__global__ __launch_bounds__(default_block_size) void column_permute( size_type num_rows, size_type num_cols, const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig, size_type stride_orig, ValueType *__restrict__ result, size_type stride_result) { - constexpr auto warps_per_block = block_size / config::warp_size; - const auto global_id = - thread::get_thread_id(); + const auto global_id = thread::get_thread_id_flat(); const auto row_id = global_id / num_cols; const auto col_id = global_id % num_cols; if (row_id < num_rows) { @@ -459,16 +489,14 @@ __global__ __launch_bounds__(block_size) void column_permute( } -template -__global__ __launch_bounds__(block_size) void inverse_row_permute( +template +__global__ __launch_bounds__(default_block_size) void inverse_row_permute( size_type num_rows, size_type num_cols, const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig, size_type stride_orig, ValueType *__restrict__ result, size_type stride_result) { - constexpr auto warps_per_block = block_size / config::warp_size; - const auto global_id = - thread::get_thread_id(); + const auto global_id = thread::get_thread_id_flat(); const auto row_id = global_id / num_cols; const auto col_id = global_id % num_cols; if (row_id < num_rows) { @@ -478,16 +506,14 @@ __global__ __launch_bounds__(block_size) void inverse_row_permute( } -template -__global__ __launch_bounds__(block_size) void inverse_column_permute( +template +__global__ __launch_bounds__(default_block_size) void inverse_column_permute( size_type num_rows, size_type num_cols, const IndexType *__restrict__ perm_idxs, const ValueType *__restrict__ orig, size_type stride_orig, ValueType *__restrict__ result, size_type stride_result) { - constexpr auto warps_per_block = block_size / config::warp_size; - const auto global_id = - thread::get_thread_id(); + const auto global_id = thread::get_thread_id_flat(); const auto row_id = global_id / num_cols; const auto col_id = global_id % num_cols; if (row_id < num_rows) { diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index da34e4bd11a..60ca0316f5e 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -218,6 +218,18 @@ GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL(ValueType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); +template +GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL); + +template +GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL); + template GKO_DECLARE_DENSE_ROW_GATHER_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); @@ -688,6 +700,12 @@ GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL); +template +GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); + template GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index a9a40613e78..fc87cdaf561 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -69,6 +69,7 @@ GKO_REGISTER_OPERATION(convert_to_ell, csr::convert_to_ell); GKO_REGISTER_OPERATION(convert_to_hybrid, csr::convert_to_hybrid); GKO_REGISTER_OPERATION(transpose, csr::transpose); GKO_REGISTER_OPERATION(conj_transpose, csr::conj_transpose); +GKO_REGISTER_OPERATION(inv_symm_permute, csr::inv_symm_permute); GKO_REGISTER_OPERATION(row_permute, csr::row_permute); GKO_REGISTER_OPERATION(inverse_row_permute, csr::inverse_row_permute); GKO_REGISTER_OPERATION(inverse_column_permute, csr::inverse_column_permute); @@ -402,6 +403,48 @@ std::unique_ptr Csr::conj_transpose() const } +template +std::unique_ptr Csr::permute( + const Array *permutation_indices) const +{ + GKO_ASSERT_IS_SQUARE_MATRIX(this); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + auto exec = this->get_executor(); + auto permute_cpy = + Csr::create(exec, this->get_size(), this->get_num_stored_elements(), + this->get_strategy()); + Array inv_permutation(exec, this->get_size()[1]); + + exec->run(csr::make_invert_permutation( + this->get_size()[1], + make_temporary_clone(exec, permutation_indices)->get_const_data(), + inv_permutation.get_data())); + exec->run(csr::make_inv_symm_permute(inv_permutation.get_const_data(), this, + permute_cpy.get())); + permute_cpy->make_srow(); + return std::move(permute_cpy); +} + + +template +std::unique_ptr Csr::inverse_permute( + const Array *permutation_indices) const +{ + GKO_ASSERT_IS_SQUARE_MATRIX(this); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + auto exec = this->get_executor(); + auto permute_cpy = + Csr::create(exec, this->get_size(), this->get_num_stored_elements(), + this->get_strategy()); + + exec->run(csr::make_inv_symm_permute( + make_temporary_clone(exec, permutation_indices)->get_const_data(), this, + permute_cpy.get())); + permute_cpy->make_srow(); + return std::move(permute_cpy); +} + + template std::unique_ptr Csr::row_permute( const Array *permutation_indices) const @@ -445,19 +488,17 @@ std::unique_ptr Csr::column_permute( template std::unique_ptr Csr::inverse_row_permute( - const Array *inverse_permutation_indices) const + const Array *permutation_indices) const { - GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), - this->get_size()[0]); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); auto exec = this->get_executor(); auto inverse_permute_cpy = Csr::create(exec, this->get_size(), this->get_num_stored_elements(), this->get_strategy()); exec->run(csr::make_inverse_row_permute( - make_temporary_clone(exec, inverse_permutation_indices) - ->get_const_data(), - this, inverse_permute_cpy.get())); + make_temporary_clone(exec, permutation_indices)->get_const_data(), this, + inverse_permute_cpy.get())); inverse_permute_cpy->make_srow(); return std::move(inverse_permute_cpy); } @@ -465,19 +506,17 @@ std::unique_ptr Csr::inverse_row_permute( template std::unique_ptr Csr::inverse_column_permute( - const Array *inverse_permutation_indices) const + const Array *permutation_indices) const { - GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), - this->get_size()[1]); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); auto exec = this->get_executor(); auto inverse_permute_cpy = Csr::create(exec, this->get_size(), this->get_num_stored_elements(), this->get_strategy()); exec->run(csr::make_inverse_column_permute( - make_temporary_clone(exec, inverse_permutation_indices) - ->get_const_data(), - this, inverse_permute_cpy.get())); + make_temporary_clone(exec, permutation_indices)->get_const_data(), this, + inverse_permute_cpy.get())); inverse_permute_cpy->make_srow(); inverse_permute_cpy->sort_by_column_index(); return std::move(inverse_permute_cpy); diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp index 6f7b72312d9..3679c353fb0 100644 --- a/core/matrix/csr_kernels.hpp +++ b/core/matrix/csr_kernels.hpp @@ -129,6 +129,12 @@ namespace kernels { const matrix::Csr *orig, \ matrix::Csr *trans) +#define GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType) \ + void inv_symm_permute(std::shared_ptr exec, \ + const IndexType *permutation_indices, \ + const matrix::Csr *orig, \ + matrix::Csr *permuted) + #define GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ void row_permute(std::shared_ptr exec, \ const IndexType *permutation_indices, \ @@ -207,6 +213,8 @@ namespace kernels { template \ GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType); \ template \ + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType); \ + template \ GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType); \ diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index a9e6a3a1515..0d70016ab6d 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -75,6 +75,8 @@ GKO_REGISTER_OPERATION(calculate_nonzeros_per_row, GKO_REGISTER_OPERATION(calculate_total_cols, dense::calculate_total_cols); GKO_REGISTER_OPERATION(transpose, dense::transpose); GKO_REGISTER_OPERATION(conj_transpose, dense::conj_transpose); +GKO_REGISTER_OPERATION(symm_permute, dense::symm_permute); +GKO_REGISTER_OPERATION(inv_symm_permute, dense::inv_symm_permute); GKO_REGISTER_OPERATION(row_gather, dense::row_gather); GKO_REGISTER_OPERATION(column_permute, dense::column_permute); GKO_REGISTER_OPERATION(inverse_row_permute, dense::inverse_row_permute); @@ -638,6 +640,74 @@ std::unique_ptr Dense::conj_transpose() const } +template +std::unique_ptr Dense::permute( + const Array *permutation_indices) const +{ + GKO_ASSERT_IS_SQUARE_MATRIX(this); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + auto exec = this->get_executor(); + auto permute_cpy = Dense::create(exec, this->get_size()); + + exec->run(dense::make_symm_permute( + make_temporary_clone(exec, permutation_indices).get(), this, + permute_cpy.get())); + + return std::move(permute_cpy); +} + + +template +std::unique_ptr Dense::permute( + const Array *permutation_indices) const +{ + GKO_ASSERT_IS_SQUARE_MATRIX(this); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + auto exec = this->get_executor(); + auto permute_cpy = Dense::create(exec, this->get_size()); + + exec->run(dense::make_symm_permute( + make_temporary_clone(exec, permutation_indices).get(), this, + permute_cpy.get())); + + return std::move(permute_cpy); +} + + +template +std::unique_ptr Dense::inverse_permute( + const Array *permutation_indices) const +{ + GKO_ASSERT_IS_SQUARE_MATRIX(this); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + auto exec = this->get_executor(); + auto permute_cpy = Dense::create(exec, this->get_size()); + + exec->run(dense::make_inv_symm_permute( + make_temporary_clone(exec, permutation_indices).get(), this, + permute_cpy.get())); + + return std::move(permute_cpy); +} + + +template +std::unique_ptr Dense::inverse_permute( + const Array *permutation_indices) const +{ + GKO_ASSERT_IS_SQUARE_MATRIX(this); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + auto exec = this->get_executor(); + auto permute_cpy = Dense::create(exec, this->get_size()); + + exec->run(dense::make_inv_symm_permute( + make_temporary_clone(exec, permutation_indices).get(), this, + permute_cpy.get())); + + return std::move(permute_cpy); +} + + template std::unique_ptr Dense::row_permute( const Array *permutation_indices) const @@ -763,15 +833,14 @@ std::unique_ptr Dense::column_permute( template std::unique_ptr Dense::inverse_row_permute( - const Array *inverse_permutation_indices) const + const Array *permutation_indices) const { - GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), - this->get_size()[0]); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); auto exec = this->get_executor(); auto inverse_permute_cpy = Dense::create(exec, this->get_size()); exec->run(dense::make_inverse_row_permute( - make_temporary_clone(exec, inverse_permutation_indices).get(), this, + make_temporary_clone(exec, permutation_indices).get(), this, inverse_permute_cpy.get())); return std::move(inverse_permute_cpy); @@ -780,15 +849,14 @@ std::unique_ptr Dense::inverse_row_permute( template std::unique_ptr Dense::inverse_row_permute( - const Array *inverse_permutation_indices) const + const Array *permutation_indices) const { - GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), - this->get_size()[0]); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); auto exec = this->get_executor(); auto inverse_permute_cpy = Dense::create(exec, this->get_size()); exec->run(dense::make_inverse_row_permute( - make_temporary_clone(exec, inverse_permutation_indices).get(), this, + make_temporary_clone(exec, permutation_indices).get(), this, inverse_permute_cpy.get())); return std::move(inverse_permute_cpy); @@ -797,15 +865,14 @@ std::unique_ptr Dense::inverse_row_permute( template std::unique_ptr Dense::inverse_column_permute( - const Array *inverse_permutation_indices) const + const Array *permutation_indices) const { - GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), - this->get_size()[1]); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); auto exec = this->get_executor(); auto inverse_permute_cpy = Dense::create(exec, this->get_size()); exec->run(dense::make_inverse_column_permute( - make_temporary_clone(exec, inverse_permutation_indices).get(), this, + make_temporary_clone(exec, permutation_indices).get(), this, inverse_permute_cpy.get())); return std::move(inverse_permute_cpy); @@ -814,15 +881,14 @@ std::unique_ptr Dense::inverse_column_permute( template std::unique_ptr Dense::inverse_column_permute( - const Array *inverse_permutation_indices) const + const Array *permutation_indices) const { - GKO_ASSERT_EQ(inverse_permutation_indices->get_num_elems(), - this->get_size()[1]); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); auto exec = this->get_executor(); auto inverse_permute_cpy = Dense::create(exec, this->get_size()); exec->run(dense::make_inverse_column_permute( - make_temporary_clone(exec, inverse_permutation_indices).get(), this, + make_temporary_clone(exec, permutation_indices).get(), this, inverse_permute_cpy.get())); return std::move(inverse_permute_cpy); diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 279c7001cc3..1799147d36e 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -143,6 +143,18 @@ namespace kernels { const matrix::Dense<_type> *orig, \ matrix::Dense<_type> *trans) +#define GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL(_vtype, _itype) \ + void symm_permute(std::shared_ptr exec, \ + const Array<_itype> *permutation_indices, \ + const matrix::Dense<_vtype> *orig, \ + matrix::Dense<_vtype> *permuted) + +#define GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL(_vtype, _itype) \ + void inv_symm_permute(std::shared_ptr exec, \ + const Array<_itype> *permutation_indices, \ + const matrix::Dense<_vtype> *orig, \ + matrix::Dense<_vtype> *permuted) + #define GKO_DECLARE_DENSE_ROW_GATHER_KERNEL(_vtype, _itype) \ void row_gather(std::shared_ptr exec, \ const Array<_itype> *gather_indices, \ @@ -238,6 +250,10 @@ namespace kernels { template \ GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL(ValueType); \ template \ + GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType); \ + template \ GKO_DECLARE_DENSE_ROW_GATHER_KERNEL(ValueType, IndexType); \ template \ GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType); \ diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index b90745fc8d9..e2ab3e23f95 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -1156,6 +1156,31 @@ void invert_permutation(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); +template +void inv_symm_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *permuted) +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + inv_row_ptr_permute_kernel<<>>( + num_rows, perm, orig->get_const_row_ptrs(), permuted->get_row_ptrs()); + components::prefix_sum(exec, permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + inv_symm_permute_kernel + <<>>( + num_rows, perm, orig->get_const_row_ptrs(), + orig->get_const_col_idxs(), as_cuda_type(orig->get_const_values()), + permuted->get_row_ptrs(), permuted->get_col_idxs(), + as_cuda_type(permuted->get_values())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); + + template void row_permute(std::shared_ptr exec, const IndexType *perm, diff --git a/cuda/matrix/dense_kernels.cu b/cuda/matrix/dense_kernels.cu index 41405402b7a..ae5be6b21ec 100644 --- a/cuda/matrix/dense_kernels.cu +++ b/cuda/matrix/dense_kernels.cu @@ -602,20 +602,55 @@ void conj_transpose(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); +template +void symm_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *permuted) +{ + const auto num_blocks = + ceildiv(orig->get_size()[0] * orig->get_size()[1], default_block_size); + kernel::symm_permute<<>>( + orig->get_size()[0], orig->get_size()[1], + permutation_indices->get_const_data(), + as_cuda_type(orig->get_const_values()), orig->get_stride(), + as_cuda_type(permuted->get_values()), permuted->get_stride()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL); + + +template +void inv_symm_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *permuted) +{ + const auto num_blocks = + ceildiv(orig->get_size()[0] * orig->get_size()[1], default_block_size); + kernel::inv_symm_permute<<>>( + orig->get_size()[0], orig->get_size()[1], + permutation_indices->get_const_data(), + as_cuda_type(orig->get_const_values()), orig->get_stride(), + as_cuda_type(permuted->get_values()), permuted->get_stride()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL); + + template void row_gather(std::shared_ptr exec, const Array *row_indices, const matrix::Dense *orig, matrix::Dense *row_gathered) { - constexpr auto block_size = default_block_size; auto out_num_rows = row_indices->get_num_elems(); - const dim3 grid_dim = - ceildiv(out_num_rows * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - kernel::row_gather<<>>( - out_num_rows, orig->get_size()[1], - as_cuda_type(row_indices->get_const_data()), + const auto num_blocks = + ceildiv(out_num_rows * orig->get_size()[1], default_block_size); + kernel::row_gather<<>>( + out_num_rows, orig->get_size()[1], row_indices->get_const_data(), as_cuda_type(orig->get_const_values()), orig->get_stride(), as_cuda_type(row_gathered->get_values()), row_gathered->get_stride()); } @@ -630,13 +665,11 @@ void column_permute(std::shared_ptr exec, const matrix::Dense *orig, matrix::Dense *column_permuted) { - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - kernel::column_permute<<>>( + const auto num_blocks = + ceildiv(orig->get_size()[0] * orig->get_size()[1], default_block_size); + kernel::column_permute<<>>( orig->get_size()[0], orig->get_size()[1], - as_cuda_type(permutation_indices->get_const_data()), + permutation_indices->get_const_data(), as_cuda_type(orig->get_const_values()), orig->get_stride(), as_cuda_type(column_permuted->get_values()), column_permuted->get_stride()); @@ -652,13 +685,11 @@ void inverse_row_permute(std::shared_ptr exec, const matrix::Dense *orig, matrix::Dense *row_permuted) { - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - kernel::inverse_row_permute<<>>( + const auto num_blocks = + ceildiv(orig->get_size()[0] * orig->get_size()[1], default_block_size); + kernel::inverse_row_permute<<>>( orig->get_size()[0], orig->get_size()[1], - as_cuda_type(permutation_indices->get_const_data()), + permutation_indices->get_const_data(), as_cuda_type(orig->get_const_values()), orig->get_stride(), as_cuda_type(row_permuted->get_values()), row_permuted->get_stride()); } @@ -673,13 +704,11 @@ void inverse_column_permute(std::shared_ptr exec, const matrix::Dense *orig, matrix::Dense *column_permuted) { - constexpr auto block_size = default_block_size; - const dim3 grid_dim = - ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - kernel::inverse_column_permute<<>>( + const auto num_blocks = + ceildiv(orig->get_size()[0] * orig->get_size()[1], default_block_size); + kernel::inverse_column_permute<<>>( orig->get_size()[0], orig->get_size()[1], - as_cuda_type(permutation_indices->get_const_data()), + permutation_indices->get_const_data(), as_cuda_type(orig->get_const_values()), orig->get_stride(), as_cuda_type(column_permuted->get_values()), column_permuted->get_stride()); diff --git a/cuda/test/matrix/csr_kernels.cpp b/cuda/test/matrix/csr_kernels.cpp index 5cf33644c38..dd8dca752ef 100644 --- a/cuda/test/matrix/csr_kernels.cpp +++ b/cuda/test/matrix/csr_kernels.cpp @@ -738,6 +738,32 @@ TEST_F(Csr, MoveToHybridIsEquivalentToRef) } +TEST_F(Csr, IsPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto permuted = gko::as(square_mtx->permute(rpermute_idxs.get())); + auto dpermuted = gko::as(square_dmtx->permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted); + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + +TEST_F(Csr, IsInversePermutable) +{ + set_up_apply_data(std::make_shared()); + + auto permuted = + gko::as(square_mtx->inverse_permute(rpermute_idxs.get())); + auto dpermuted = + gko::as(square_dmtx->inverse_permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted); + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Csr, IsRowPermutable) { set_up_apply_data(std::make_shared()); diff --git a/cuda/test/matrix/dense_kernels.cpp b/cuda/test/matrix/dense_kernels.cpp index 9e48a5ea807..a21af6a2a67 100644 --- a/cuda/test/matrix/dense_kernels.cpp +++ b/cuda/test/matrix/dense_kernels.cpp @@ -118,6 +118,7 @@ class Dense : public ::testing::Test { expected = gen_mtx(65, 35); alpha = gko::initialize({2.0}, ref); beta = gko::initialize({-1.0}, ref); + square = gen_mtx(x->get_size()[0], x->get_size()[0]); dx = Mtx::create(cuda); dx->copy_from(x.get()); dc_x = ComplexMtx::create(cuda); @@ -130,6 +131,8 @@ class Dense : public ::testing::Test { dalpha->copy_from(alpha.get()); dbeta = Mtx::create(cuda); dbeta->copy_from(beta.get()); + dsquare = Mtx::create(cuda); + dsquare->copy_from(square.get()); std::vector tmp(x->get_size()[0], 0); auto rng = std::default_random_engine{}; @@ -162,12 +165,14 @@ class Dense : public ::testing::Test { std::unique_ptr alpha; std::unique_ptr beta; std::unique_ptr expected; + std::unique_ptr square; std::unique_ptr dresult; std::unique_ptr dx; std::unique_ptr dc_x; std::unique_ptr dy; std::unique_ptr dalpha; std::unique_ptr dbeta; + std::unique_ptr dsquare; std::unique_ptr rpermute_idxs; std::unique_ptr cpermute_idxs; std::unique_ptr rgather_idxs; @@ -591,6 +596,30 @@ TEST_F(Dense, CanGatherRowsIntoDense) } +TEST_F(Dense, IsPermutable) +{ + set_up_apply_data(); + + auto permuted = square->permute(rpermute_idxs.get()); + auto dpermuted = dsquare->permute(rpermute_idxs.get()); + + GKO_ASSERT_MTX_NEAR(static_cast(permuted.get()), + static_cast(dpermuted.get()), 0); +} + + +TEST_F(Dense, IsInversePermutable) +{ + set_up_apply_data(); + + auto permuted = square->inverse_permute(rpermute_idxs.get()); + auto dpermuted = dsquare->inverse_permute(rpermute_idxs.get()); + + GKO_ASSERT_MTX_NEAR(static_cast(permuted.get()), + static_cast(dpermuted.get()), 0); +} + + TEST_F(Dense, IsRowPermutable) { set_up_apply_data(); diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index 7863539f7a2..9220939777f 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -262,6 +262,16 @@ void invert_permutation(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); +template +void inv_symm_permute( + std::shared_ptr exec, const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *permuted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); + + template void row_permute( std::shared_ptr exec, const IndexType *perm, diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index 714d4f588b2..b9a759a1123 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -240,6 +240,26 @@ void conj_transpose(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); +template +void symm_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *permuted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL); + + +template +void inv_symm_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *permuted) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL); + + template void row_gather(std::shared_ptr exec, const Array *gather_indices, diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index 6effd739123..8adf3f98a80 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -981,6 +981,33 @@ void invert_permutation(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); +template +void inv_symm_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *permuted) +{ + auto num_rows = orig->get_size()[0]; + auto count_num_blocks = ceildiv(num_rows, default_block_size); + hipLaunchKernelGGL(HIP_KERNEL_NAME(inv_row_ptr_permute_kernel), + count_num_blocks, default_block_size, 0, 0, num_rows, + perm, orig->get_const_row_ptrs(), + permuted->get_row_ptrs()); + components::prefix_sum(exec, permuted->get_row_ptrs(), num_rows + 1); + auto copy_num_blocks = + ceildiv(num_rows, default_block_size / config::warp_size); + hipLaunchKernelGGL( + HIP_KERNEL_NAME(inv_symm_permute_kernel), + copy_num_blocks, default_block_size, 0, 0, num_rows, perm, + orig->get_const_row_ptrs(), orig->get_const_col_idxs(), + as_hip_type(orig->get_const_values()), permuted->get_row_ptrs(), + permuted->get_col_idxs(), as_hip_type(permuted->get_values())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); + + template void row_permute(std::shared_ptr exec, const IndexType *perm, const matrix::Csr *orig, diff --git a/hip/matrix/dense_kernels.hip.cpp b/hip/matrix/dense_kernels.hip.cpp index b62b3736e65..3f7f4942dc8 100644 --- a/hip/matrix/dense_kernels.hip.cpp +++ b/hip/matrix/dense_kernels.hip.cpp @@ -625,6 +625,47 @@ void conj_transpose(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); +template +void symm_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *permuted) +{ + constexpr auto block_size = default_block_size; + const auto num_blocks = + ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); + hipLaunchKernelGGL( + kernel::symm_permute, num_blocks, block_size, 0, 0, orig->get_size()[0], + orig->get_size()[1], permutation_indices->get_const_data(), + as_hip_type(orig->get_const_values()), orig->get_stride(), + as_hip_type(permuted->get_values()), permuted->get_stride()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL); + + +template +void inv_symm_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *permuted) +{ + constexpr auto block_size = default_block_size; + const auto num_blocks = + ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); + hipLaunchKernelGGL(kernel::inv_symm_permute, num_blocks, block_size, 0, 0, + orig->get_size()[0], orig->get_size()[1], + permutation_indices->get_const_data(), + as_hip_type(orig->get_const_values()), + orig->get_stride(), as_hip_type(permuted->get_values()), + permuted->get_stride()); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL); + + template void row_gather(std::shared_ptr exec, const Array *row_indices, @@ -633,13 +674,11 @@ void row_gather(std::shared_ptr exec, { constexpr auto block_size = default_block_size; auto out_num_rows = row_indices->get_num_elems(); - const dim3 grid_dim = + const auto num_blocks = ceildiv(out_num_rows * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; hipLaunchKernelGGL( - kernel::row_gather, dim3(grid_dim), dim3(block_dim), 0, 0, - out_num_rows, orig->get_size()[1], - as_hip_type(row_indices->get_const_data()), + kernel::row_gather, num_blocks, block_size, 0, 0, out_num_rows, + orig->get_size()[1], row_indices->get_const_data(), as_hip_type(orig->get_const_values()), orig->get_stride(), as_hip_type(row_gathered->get_values()), row_gathered->get_stride()); } @@ -655,16 +694,15 @@ void column_permute(std::shared_ptr exec, matrix::Dense *column_permuted) { constexpr auto block_size = default_block_size; - const dim3 grid_dim = + const auto num_blocks = ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - hipLaunchKernelGGL( - kernel::column_permute, dim3(grid_dim), dim3(block_dim), 0, - 0, orig->get_size()[0], orig->get_size()[1], - as_hip_type(permutation_indices->get_const_data()), - as_hip_type(orig->get_const_values()), orig->get_stride(), - as_hip_type(column_permuted->get_values()), - column_permuted->get_stride()); + hipLaunchKernelGGL(kernel::column_permute, num_blocks, block_size, 0, 0, + orig->get_size()[0], orig->get_size()[1], + permutation_indices->get_const_data(), + as_hip_type(orig->get_const_values()), + orig->get_stride(), + as_hip_type(column_permuted->get_values()), + column_permuted->get_stride()); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -678,13 +716,12 @@ void inverse_row_permute(std::shared_ptr exec, matrix::Dense *row_permuted) { constexpr auto block_size = default_block_size; - const dim3 grid_dim = + const auto num_blocks = ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; hipLaunchKernelGGL( - kernel::inverse_row_permute, dim3(grid_dim), - dim3(block_dim), 0, 0, orig->get_size()[0], orig->get_size()[1], - as_hip_type(permutation_indices->get_const_data()), + kernel::inverse_row_permute, num_blocks, block_size, 0, 0, + orig->get_size()[0], orig->get_size()[1], + permutation_indices->get_const_data(), as_hip_type(orig->get_const_values()), orig->get_stride(), as_hip_type(row_permuted->get_values()), row_permuted->get_stride()); } @@ -700,16 +737,15 @@ void inverse_column_permute(std::shared_ptr exec, matrix::Dense *column_permuted) { constexpr auto block_size = default_block_size; - const dim3 grid_dim = + const auto num_blocks = ceildiv(orig->get_size()[0] * orig->get_size()[1], block_size); - const dim3 block_dim{config::warp_size, 1, block_size / config::warp_size}; - hipLaunchKernelGGL( - kernel::inverse_column_permute, dim3(grid_dim), - dim3(block_dim), 0, 0, orig->get_size()[0], orig->get_size()[1], - as_hip_type(permutation_indices->get_const_data()), - as_hip_type(orig->get_const_values()), orig->get_stride(), - as_hip_type(column_permuted->get_values()), - column_permuted->get_stride()); + hipLaunchKernelGGL(kernel::inverse_column_permute, num_blocks, block_size, + 0, 0, orig->get_size()[0], orig->get_size()[1], + permutation_indices->get_const_data(), + as_hip_type(orig->get_const_values()), + orig->get_stride(), + as_hip_type(column_permuted->get_values()), + column_permuted->get_stride()); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/hip/test/matrix/csr_kernels.hip.cpp b/hip/test/matrix/csr_kernels.hip.cpp index 21a2e751714..36ad284ad71 100644 --- a/hip/test/matrix/csr_kernels.hip.cpp +++ b/hip/test/matrix/csr_kernels.hip.cpp @@ -732,6 +732,32 @@ TEST_F(Csr, MoveToHybridIsEquivalentToRef) } +TEST_F(Csr, IsPermutable) +{ + set_up_apply_data(std::make_shared()); + + auto permuted = gko::as(square_mtx->permute(rpermute_idxs.get())); + auto dpermuted = gko::as(square_dmtx->permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted); + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + +TEST_F(Csr, IsInversePermutable) +{ + set_up_apply_data(std::make_shared()); + + auto permuted = + gko::as(square_mtx->inverse_permute(rpermute_idxs.get())); + auto dpermuted = + gko::as(square_dmtx->inverse_permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted); + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Csr, IsRowPermutable) { set_up_apply_data(std::make_shared()); diff --git a/hip/test/matrix/dense_kernels.hip.cpp b/hip/test/matrix/dense_kernels.hip.cpp index 210f4a879e3..dcd3ace06b9 100644 --- a/hip/test/matrix/dense_kernels.hip.cpp +++ b/hip/test/matrix/dense_kernels.hip.cpp @@ -117,6 +117,7 @@ class Dense : public ::testing::Test { expected = gen_mtx(65, 35); alpha = gko::initialize({2.0}, ref); beta = gko::initialize({-1.0}, ref); + square = gen_mtx(x->get_size()[0], x->get_size()[0]); dx = Mtx::create(hip); dx->copy_from(x.get()); dy = Mtx::create(hip); @@ -127,6 +128,8 @@ class Dense : public ::testing::Test { dalpha->copy_from(alpha.get()); dbeta = Mtx::create(hip); dbeta->copy_from(beta.get()); + dsquare = Mtx::create(hip); + dsquare->copy_from(square.get()); std::vector tmp(x->get_size()[0], 0); auto rng = std::default_random_engine{}; @@ -158,11 +161,13 @@ class Dense : public ::testing::Test { std::unique_ptr alpha; std::unique_ptr beta; std::unique_ptr expected; + std::unique_ptr square; std::unique_ptr dresult; std::unique_ptr dx; std::unique_ptr dy; std::unique_ptr dalpha; std::unique_ptr dbeta; + std::unique_ptr dsquare; std::unique_ptr rpermute_idxs; std::unique_ptr cpermute_idxs; std::unique_ptr rgather_idxs; @@ -574,6 +579,30 @@ TEST_F(Dense, CanGatherRowsIntoDense) } +TEST_F(Dense, IsPermutable) +{ + set_up_apply_data(); + + auto permuted = square->permute(rpermute_idxs.get()); + auto dpermuted = dsquare->permute(rpermute_idxs.get()); + + GKO_ASSERT_MTX_NEAR(static_cast(permuted.get()), + static_cast(dpermuted.get()), 0); +} + + +TEST_F(Dense, IsInversePermutable) +{ + set_up_apply_data(); + + auto permuted = square->inverse_permute(rpermute_idxs.get()); + auto dpermuted = dsquare->inverse_permute(rpermute_idxs.get()); + + GKO_ASSERT_MTX_NEAR(static_cast(permuted.get()), + static_cast(dpermuted.get()), 0); +} + + TEST_F(Dense, IsRowPermutable) { set_up_apply_data(); diff --git a/include/ginkgo/core/base/lin_op.hpp b/include/ginkgo/core/base/lin_op.hpp index 5c2c9a34fa1..59c7a02586d 100644 --- a/include/ginkgo/core/base/lin_op.hpp +++ b/include/ginkgo/core/base/lin_op.hpp @@ -440,18 +440,19 @@ class Transposable { * Linear operators which support permutation should implement the * Permutable interface. * - * It provides four functionalities, the row permute, the - * column permute, the inverse row permute and the inverse column permute. - * - * The row permute returns the permutation of the linear operator after - * permuting the rows of the linear operator. For example, if for a matrix A, - * the permuted matrix A' and the permutation array perm, the row i of the - * matrix A is the row perm[i] in the matrix A'. And similarly, for the inverse - * permutation, the row i in the matrix A' is the row perm[i] in the matrix A. - * - * The column permute returns the permutation of the linear operator after - * permuting the columns of the linear operator. The definitions of permute and - * inverse permute for the row_permute hold here as well. + * It provides functions to permute the rows and columns of a LinOp, + * independently or symmetrically, and with a regular or inverted permutation. + * + * After a regular row permutation with permutation array `perm` the row `i` in + * the output LinOp contains the row `perm[i]` from the input LinOp. + * After an inverse row permutation, the row `perm[i]` in the output LinOp + * contains the row `i` from the input LinOp. + * Equivalently, after a column permutation, the output stores in column `i` + * the column `perm[i]` from the input, and an inverse column permutation + * stores in column `perm[i]` the column `i` from the input. + * A symmetric permutation is functionally equivalent to calling + * `as(A->row_permute(perm))->column_permute(perm)`, but the + * implementation can provide better performance due to kernel fusion. * * Example: Permuting a Csr matrix: * ------------------------------------ @@ -469,12 +470,49 @@ class Permutable { public: virtual ~Permutable() = default; + /** + * Returns a LinOp representing the symmetric row and column permutation of + * the Permutable object. + * In the resulting LinOp, the entry at location `(i,j)` contains the input + * value `(perm[i],perm[j])`. + * + * @param permutation_indices the array of indices containing the + * permutation order. + * + * @return a pointer to the new permuted object + */ + virtual std::unique_ptr permute( + const Array *permutation_indices) const + { + return as(this->row_permute(permutation_indices)) + ->column_permute(permutation_indices); + }; + + /** + * Returns a LinOp representing the symmetric inverse row and column + * permutation of the Permutable object. + * In the resulting LinOp, the entry at location `(perm[i],perm[j])` + * contains the input value `(i,j)`. + * + * @param permutation_indices the array of indices containing the + * permutation order. + * + * @return a pointer to the new permuted object + */ + virtual std::unique_ptr inverse_permute( + const Array *permutation_indices) const + { + return as(this->inverse_row_permute(permutation_indices)) + ->inverse_column_permute(permutation_indices); + }; + /** * Returns a LinOp representing the row permutation of the Permutable * object. + * In the resulting LinOp, the row `i` contains the input row `perm[i]`. * - * @param permutation_indices the array of indices contaning the - * permutation order. + * @param permutation_indices the array of indices containing the + * permutation order. * * @return a pointer to the new permuted object */ @@ -484,9 +522,11 @@ class Permutable { /** * Returns a LinOp representing the column permutation of the Permutable * object. + * In the resulting LinOp, the column `i` contains the input column + * `perm[i]`. * - * @param permutation_indices the array of indices contaning the - * permutation order. + * @param permutation_indices the array of indices containing the + * permutation order `perm`. * * @return a pointer to the new column permuted object */ @@ -496,26 +536,29 @@ class Permutable { /** * Returns a LinOp representing the row permutation of the inverse permuted * object. + * In the resulting LinOp, the row `perm[i]` contains the input row `i`. * - * @param inverse_permutation_indices the array of indices contaning the - * inverse permutation order. + * @param permutation_indices the array of indices containing the + * permutation order `perm`. * * @return a pointer to the new inverse permuted object */ virtual std::unique_ptr inverse_row_permute( - const Array *inverse_permutation_indices) const = 0; + const Array *permutation_indices) const = 0; /** * Returns a LinOp representing the row permutation of the inverse permuted * object. + * In the resulting LinOp, the column `perm[i]` contains the input column + * `i`. * - * @param inverse_permutation_indices the array of indices contaning the - * inverse permutation order. + * @param permutation_indices the array of indices containing the + * permutation order `perm`. * * @return a pointer to the new inverse permuted object */ virtual std::unique_ptr inverse_column_permute( - const Array *inverse_permutation_indices) const = 0; + const Array *permutation_indices) const = 0; }; diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index bb54cabdd7a..f75cc80e1a1 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -694,6 +694,12 @@ class Csr : public EnableLinOp>, std::unique_ptr conj_transpose() const override; + std::unique_ptr permute( + const Array *permutation_indices) const override; + + std::unique_ptr inverse_permute( + const Array *inverse_permutation_indices) const override; + std::unique_ptr row_permute( const Array *permutation_indices) const override; diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp index ad689c8ad1f..a7fc245cd71 100644 --- a/include/ginkgo/core/matrix/dense.hpp +++ b/include/ginkgo/core/matrix/dense.hpp @@ -226,6 +226,18 @@ class Dense std::unique_ptr conj_transpose() const override; + std::unique_ptr permute( + const Array *permutation_indices) const override; + + std::unique_ptr permute( + const Array *permutation_indices) const override; + + std::unique_ptr inverse_permute( + const Array *permutation_indices) const override; + + std::unique_ptr inverse_permute( + const Array *permutation_indices) const override; + std::unique_ptr row_permute( const Array *permutation_indices) const override; @@ -276,16 +288,16 @@ class Dense const Array *permutation_indices) const override; std::unique_ptr inverse_row_permute( - const Array *inverse_permutation_indices) const override; + const Array *permutation_indices) const override; std::unique_ptr inverse_row_permute( - const Array *inverse_permutation_indices) const override; + const Array *permutation_indices) const override; std::unique_ptr inverse_column_permute( - const Array *inverse_permutation_indices) const override; + const Array *permutation_indices) const override; std::unique_ptr inverse_column_permute( - const Array *inverse_permutation_indices) const override; + const Array *permutation_indices) const override; std::unique_ptr> extract_diagonal() const override; diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 0f681fa409b..284bc380544 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -612,6 +612,45 @@ void invert_permutation(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); +template +void inv_symm_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *permuted) +{ + auto in_row_ptrs = orig->get_const_row_ptrs(); + auto in_col_idxs = orig->get_const_col_idxs(); + auto in_vals = orig->get_const_values(); + auto p_row_ptrs = permuted->get_row_ptrs(); + auto p_col_idxs = permuted->get_col_idxs(); + auto p_vals = permuted->get_values(); + size_type num_rows = orig->get_size()[0]; + +#pragma omp parallel for + for (size_type row = 0; row < num_rows; ++row) { + auto src_row = row; + auto dst_row = perm[row]; + p_row_ptrs[dst_row] = in_row_ptrs[src_row + 1] - in_row_ptrs[src_row]; + } + components::prefix_sum(exec, p_row_ptrs, num_rows + 1); +#pragma omp parallel for + for (size_type row = 0; row < num_rows; ++row) { + auto src_row = row; + auto dst_row = perm[row]; + auto src_begin = in_row_ptrs[src_row]; + auto dst_begin = p_row_ptrs[dst_row]; + auto row_size = in_row_ptrs[src_row + 1] - src_begin; + for (IndexType i = 0; i < row_size; ++i) { + p_col_idxs[dst_begin + i] = perm[in_col_idxs[src_begin + i]]; + p_vals[dst_begin + i] = in_vals[src_begin + i]; + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); + + template void row_permute(std::shared_ptr exec, const IndexType *perm, const matrix::Csr *orig, diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index 7f49b3d6c8b..a14f45ba033 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -679,6 +679,46 @@ void conj_transpose(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); +template +void symm_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *permuted) +{ + auto perm = permutation_indices->get_const_data(); + auto size = orig->get_size()[0]; +#pragma omp parallel for + for (size_type i = 0; i < size; ++i) { + for (size_type j = 0; j < size; ++j) { + permuted->at(i, j) = orig->at(perm[i], perm[j]); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL); + + +template +void inv_symm_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *permuted) +{ + auto perm = permutation_indices->get_const_data(); + auto size = orig->get_size()[0]; +#pragma omp parallel for + for (size_type i = 0; i < size; ++i) { + for (size_type j = 0; j < size; ++j) { + permuted->at(perm[i], perm[j]) = orig->at(i, j); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL); + + template void row_gather(std::shared_ptr exec, const Array *row_indices, diff --git a/omp/test/matrix/csr_kernels.cpp b/omp/test/matrix/csr_kernels.cpp index 439d8271a09..2419b5c1a41 100644 --- a/omp/test/matrix/csr_kernels.cpp +++ b/omp/test/matrix/csr_kernels.cpp @@ -465,6 +465,32 @@ TEST_F(Csr, MoveToHybridIsEquivalentToRef) } +TEST_F(Csr, IsPermutable) +{ + set_up_apply_data(); + + auto permuted = gko::as(square_mtx->permute(rpermute_idxs.get())); + auto dpermuted = gko::as(square_dmtx->permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted); + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + +TEST_F(Csr, IsInversePermutable) +{ + set_up_apply_data(); + + auto permuted = + gko::as(square_mtx->inverse_permute(rpermute_idxs.get())); + auto dpermuted = + gko::as(square_dmtx->inverse_permute(rpermute_idxs.get())); + + GKO_ASSERT_MTX_EQ_SPARSITY(permuted, dpermuted); + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Csr, IsRowPermutable) { set_up_apply_data(); diff --git a/omp/test/matrix/dense_kernels.cpp b/omp/test/matrix/dense_kernels.cpp index 2ee7ea96075..4f36313c8ad 100644 --- a/omp/test/matrix/dense_kernels.cpp +++ b/omp/test/matrix/dense_kernels.cpp @@ -130,6 +130,7 @@ class Dense : public ::testing::Test { expected = gen_mtx(40, 35); alpha = gko::initialize({2.0}, ref); beta = gko::initialize({-1.0}, ref); + square = gen_mtx(x->get_size()[0], x->get_size()[0]); dx = Mtx::create(omp); dx->copy_from(x.get()); dc_x = ComplexMtx::create(omp); @@ -142,6 +143,8 @@ class Dense : public ::testing::Test { dalpha->copy_from(alpha.get()); dbeta = Mtx::create(omp); dbeta->copy_from(beta.get()); + dsquare = Mtx::create(omp); + dsquare->copy_from(square.get()); std::vector tmp(x->get_size()[0], 0); auto rng = std::default_random_engine{}; @@ -174,12 +177,14 @@ class Dense : public ::testing::Test { std::unique_ptr alpha; std::unique_ptr beta; std::unique_ptr expected; + std::unique_ptr square; std::unique_ptr dresult; std::unique_ptr dx; std::unique_ptr dc_x; std::unique_ptr dy; std::unique_ptr dalpha; std::unique_ptr dbeta; + std::unique_ptr dsquare; std::unique_ptr rpermute_idxs; std::unique_ptr cpermute_idxs; std::unique_ptr rgather_idxs; @@ -710,6 +715,30 @@ TEST_F(Dense, CanGatherRowsIntoDense) } +TEST_F(Dense, IsPermutable) +{ + set_up_apply_data(); + + auto permuted = square->permute(rpermute_idxs.get()); + auto dpermuted = dsquare->permute(rpermute_idxs.get()); + + GKO_ASSERT_MTX_NEAR(static_cast(permuted.get()), + static_cast(dpermuted.get()), 0); +} + + +TEST_F(Dense, IsInversePermutable) +{ + set_up_apply_data(); + + auto permuted = square->inverse_permute(rpermute_idxs.get()); + auto dpermuted = dsquare->inverse_permute(rpermute_idxs.get()); + + GKO_ASSERT_MTX_NEAR(static_cast(permuted.get()), + static_cast(dpermuted.get()), 0); +} + + TEST_F(Dense, IsRowPermutable) { set_up_apply_data(); diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index 9cb739a82b9..d4d5a02e473 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -702,6 +702,43 @@ void invert_permutation(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); +template +void inv_symm_permute(std::shared_ptr exec, + const IndexType *perm, + const matrix::Csr *orig, + matrix::Csr *permuted) +{ + auto in_row_ptrs = orig->get_const_row_ptrs(); + auto in_col_idxs = orig->get_const_col_idxs(); + auto in_vals = orig->get_const_values(); + auto p_row_ptrs = permuted->get_row_ptrs(); + auto p_col_idxs = permuted->get_col_idxs(); + auto p_vals = permuted->get_values(); + size_type num_rows = orig->get_size()[0]; + + for (size_type row = 0; row < num_rows; ++row) { + auto src_row = row; + auto dst_row = perm[row]; + p_row_ptrs[dst_row] = in_row_ptrs[src_row + 1] - in_row_ptrs[src_row]; + } + components::prefix_sum(exec, p_row_ptrs, num_rows + 1); + for (size_type row = 0; row < num_rows; ++row) { + auto src_row = row; + auto dst_row = perm[row]; + auto src_begin = in_row_ptrs[src_row]; + auto dst_begin = p_row_ptrs[dst_row]; + auto row_size = in_row_ptrs[src_row + 1] - src_begin; + for (IndexType i = 0; i < row_size; ++i) { + p_col_idxs[dst_begin + i] = perm[in_col_idxs[src_begin + i]]; + p_vals[dst_begin + i] = in_vals[src_begin + i]; + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); + + template void row_permute(std::shared_ptr exec, const IndexType *perm, diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index ab020a84a3d..16443c0bbb5 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -599,6 +599,44 @@ void conj_transpose(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); +template +void symm_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *permuted) +{ + auto perm = permutation_indices->get_const_data(); + auto size = orig->get_size()[0]; + for (size_type i = 0; i < size; ++i) { + for (size_type j = 0; j < size; ++j) { + permuted->at(i, j) = orig->at(perm[i], perm[j]); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL); + + +template +void inv_symm_permute(std::shared_ptr exec, + const Array *permutation_indices, + const matrix::Dense *orig, + matrix::Dense *permuted) +{ + auto perm = permutation_indices->get_const_data(); + auto size = orig->get_size()[0]; + for (size_type i = 0; i < size; ++i) { + for (size_type j = 0; j < size; ++j) { + permuted->at(perm[i], perm[j]) = orig->at(i, j); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL); + + template void row_gather(std::shared_ptr exec, const Array *row_indices, diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index 14f153ff8f3..b963508eb45 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -1067,6 +1067,46 @@ TYPED_TEST(Csr, NonSquareMtxIsTransposable) } +TYPED_TEST(Csr, SquareMatrixIsPermutable) +{ + using Csr = typename TestFixture::Mtx; + using index_type = typename TestFixture::index_type; + // clang-format off + auto p_mtx = gko::initialize({{1.0, 3.0, 2.0}, + {0.0, 5.0, 0.0}, + {0.0, 1.5, 2.0}}, this->exec); + // clang-format on + gko::Array permute_idxs{this->exec, {1, 2, 0}}; + + auto ref_permute_csr = + gko::as(gko::as(p_mtx->row_permute(&permute_idxs)) + ->column_permute(&permute_idxs)); + auto permute_csr = gko::as(p_mtx->permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR(ref_permute_csr, permute_csr, 0.0); +} + + +TYPED_TEST(Csr, SquareMatrixIsInversePermutable) +{ + using Csr = typename TestFixture::Mtx; + using index_type = typename TestFixture::index_type; + // clang-format off + auto p_mtx = gko::initialize({{1.0, 3.0, 2.0}, + {0.0, 5.0, 0.0}, + {0.0, 1.5, 2.0}}, this->exec); + // clang-format on + gko::Array permute_idxs{this->exec, {1, 2, 0}}; + + auto ref_permute_csr = + gko::as(gko::as(p_mtx->inverse_row_permute(&permute_idxs)) + ->inverse_column_permute(&permute_idxs)); + auto permute_csr = gko::as(p_mtx->inverse_permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR(ref_permute_csr, permute_csr, 0.0); +} + + TYPED_TEST(Csr, SquareMatrixIsRowPermutable) { using Csr = typename TestFixture::Mtx; diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index 6f64ab26289..3022ece5124 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -1804,6 +1804,86 @@ TYPED_TEST(Dense, SquareMatrixCanGatherRowsIntoDense64) } +TYPED_TEST(Dense, SquareMatrixIsPermutable) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + auto ref_permuted = + gko::as(gko::as(this->mtx5->row_permute(&permute_idxs)) + ->column_permute(&permute_idxs)); + auto permuted = gko::as(this->mtx5->permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR(ref_permuted, ref_permuted, r::value); +} + + +TYPED_TEST(Dense, SquareMatrixIsInversePermutable) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + auto ref_permuted = gko::as( + gko::as(this->mtx5->inverse_row_permute(&permute_idxs)) + ->inverse_column_permute(&permute_idxs)); + auto permuted = gko::as(this->mtx5->inverse_permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR(ref_permuted, ref_permuted, r::value); +} + + +TYPED_TEST(Dense, SquareMatrixIsPermutable64) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + auto ref_permuted = + gko::as(gko::as(this->mtx5->row_permute(&permute_idxs)) + ->column_permute(&permute_idxs)); + auto permuted = gko::as(this->mtx5->permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR(ref_permuted, ref_permuted, r::value); +} + + +TYPED_TEST(Dense, SquareMatrixIsInversePermutable64) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + auto ref_permuted = gko::as( + gko::as(this->mtx5->inverse_row_permute(&permute_idxs)) + ->inverse_column_permute(&permute_idxs)); + auto permuted = gko::as(this->mtx5->inverse_permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR(ref_permuted, ref_permuted, r::value); +} + + TYPED_TEST(Dense, SquareMatrixIsRowPermutable) { // clang-format off