diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 11e66475301..8a4f31dce4c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -327,40 +327,6 @@ build/cuda92/gcc/all/release/shared: BUILD_TYPE: "Release" CUDA_ARCH: 35 -# Make sure that our jobs run when HWLOC is -# forcibly switched off -build/cuda92/intel/cuda/release/static: - <<: *default_build - extends: - - .full_test_condition - - .use_gko-cuda92-gnu7-llvm50-intel2017 - variables: - <<: *default_variables - C_COMPILER: "icc" - CXX_COMPILER: "icpc" - BUILD_OMP: "ON" - BUILD_CUDA: "ON" - BUILD_HWLOC: "OFF" - BUILD_TYPE: "Release" - BUILD_SHARED_LIBS: "OFF" - CUDA_ARCH: 35 - -# Build CUDA NVIDIA without omp -build/cuda92/intel/cuda_wo_omp/release/shared: - <<: *default_build - extends: - - .quick_test_condition - - .use_gko-cuda92-gnu7-llvm50-intel2017 - variables: - <<: *default_variables - C_COMPILER: "icc" - CXX_COMPILER: "icpc" - BUILD_CUDA: "ON" - BUILD_HIP: "ON" - BUILD_HWLOC: "OFF" - BUILD_TYPE: "Release" - CUDA_ARCH: 35 - # cuda 10.0 and friends # Make sure that our jobs run when using self-installed # third-party HWLOC. @@ -378,6 +344,8 @@ build/cuda100/gcc/all/debug/shared: FAST_TESTS: "ON" CUDA_ARCH: 35 +# Make sure that our jobs run when HWLOC is +# forcibly switched off build/cuda100/clang/all/release/static: <<: *default_build extends: @@ -390,6 +358,7 @@ build/cuda100/clang/all/release/static: BUILD_OMP: "ON" BUILD_CUDA: "ON" BUILD_HIP: "ON" + BUILD_HWLOC: "OFF" BUILD_TYPE: "Release" BUILD_SHARED_LIBS: "OFF" CUDA_ARCH: 35 @@ -408,6 +377,22 @@ build/cuda100/intel/cuda/release/shared: BUILD_TYPE: "Release" CUDA_ARCH: 35 +# Build CUDA NVIDIA without omp +build/cuda100/intel/cuda_wo_omp/release/shared: + <<: *default_build + extends: + - .full_test_condition + - .use_gko-cuda100-gnu7-llvm60-intel2018 + variables: + <<: *default_variables + C_COMPILER: "icc" + CXX_COMPILER: "icpc" + BUILD_CUDA: "ON" + BUILD_HIP: "ON" + BUILD_HWLOC: "OFF" + BUILD_TYPE: "Release" + CUDA_ARCH: 35 + # cuda 10.1 and friends build/cuda101/gcc/all/debug/shared: <<: *default_build diff --git a/README.md b/README.md index 47715cccf75..9f5cffd212b 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,8 @@ For Ginkgo core library: * C++14 compliant compiler, one of: * _gcc 5.3+, 6.3+, 7.3+, all versions after 8.1+_ * _clang 3.9+_ - * _Intel compiler 2017+_ - * _Apple LLVM 8.0+_ (__TODO__: verify) + * _Intel compiler 2018+_ + * _Apple LLVM 8.0+_ The Ginkgo CUDA module has the following __additional__ requirements: diff --git a/common/matrix/dense_kernels.hpp.inc b/common/matrix/dense_kernels.hpp.inc index f1ab299698f..8bbba33fd6d 100644 --- a/common/matrix/dense_kernels.hpp.inc +++ b/common/matrix/dense_kernels.hpp.inc @@ -33,6 +33,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace kernel { +template +__global__ __launch_bounds__(default_block_size) void strided_copy( + size_type num_rows, size_type num_cols, size_type in_stride, + size_type out_stride, const InValueType *__restrict__ input, + OutValueType *__restrict__ output) +{ + const auto global_id = thread::get_thread_id_flat(); + const auto row_id = global_id / num_cols; + const auto col_id = global_id % num_cols; + if (row_id < num_rows) { + output[row_id * out_stride + col_id] = + static_cast(input[row_id * in_stride + col_id]); + } +} + + template __global__ __launch_bounds__(default_block_size) void strided_fill( size_type num_rows, size_type num_cols, size_type stride, diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 810a3fab4e4..c4244c8317c 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -133,6 +133,12 @@ GKO_DECLARE_DENSE_APPLY_KERNEL(ValueType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +template +GKO_DECLARE_DENSE_COPY_KERNEL(InValueType, OutValueType) +GKO_NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY( + GKO_DECLARE_DENSE_COPY_KERNEL); + template GKO_DECLARE_DENSE_FILL_KERNEL(ValueType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 02edd22ce78..09f19e9b360 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -63,6 +63,7 @@ namespace dense { GKO_REGISTER_OPERATION(simple_apply, dense::simple_apply); GKO_REGISTER_OPERATION(apply, dense::apply); +GKO_REGISTER_OPERATION(copy, dense::copy); GKO_REGISTER_OPERATION(fill, dense::fill); GKO_REGISTER_OPERATION(scale, dense::scale); GKO_REGISTER_OPERATION(add_scaled, dense::add_scaled); @@ -327,13 +328,50 @@ void Dense::compute_norm2_impl(LinOp *result) const } +template +void Dense::convert_to(Dense *result) const +{ + if (this->get_size() && result->get_size() == this->get_size()) { + // we need to create a executor-local clone of the target data, that + // will be copied back later. + auto exec = this->get_executor(); + auto result_array = make_temporary_output_clone(exec, &result->values_); + // create a (value, not pointer to avoid allocation overhead) view + // matrix on the array to avoid special-casing cross-executor copies + auto tmp_result = + Dense{exec, result->get_size(), + Array::view(exec, result_array->get_num_elems(), + result_array->get_data()), + result->get_stride()}; + exec->run(dense::make_copy(this, &tmp_result)); + } else { + result->values_ = this->values_; + result->stride_ = this->stride_; + result->set_size(this->get_size()); + } +} + + +template +void Dense::move_to(Dense *result) +{ + this->convert_to(result); +} + + template void Dense::convert_to( Dense> *result) const { - result->values_ = this->values_; - result->stride_ = this->stride_; - result->set_size(this->get_size()); + if (result->get_size() == this->get_size()) { + auto exec = this->get_executor(); + exec->run(dense::make_copy( + this, make_temporary_output_clone(exec, result).get())); + } else { + result->values_ = this->values_; + result->stride_ = this->stride_; + result->set_size(this->get_size()); + } } @@ -637,107 +675,229 @@ void Dense::write(mat_data32 &data) const template std::unique_ptr Dense::transpose() const { - auto exec = this->get_executor(); - auto trans_cpy = Dense::create(exec, gko::transpose(this->get_size())); + auto result = + Dense::create(this->get_executor(), gko::transpose(this->get_size())); + this->transpose(result.get()); + return result; +} - exec->run(dense::make_transpose(this, trans_cpy.get())); - return std::move(trans_cpy); +template +std::unique_ptr Dense::conj_transpose() const +{ + auto result = + Dense::create(this->get_executor(), gko::transpose(this->get_size())); + this->conj_transpose(result.get()); + return result; } template -std::unique_ptr Dense::conj_transpose() const +void Dense::transpose(Dense *output) const { + GKO_ASSERT_EQUAL_DIMENSIONS(output, gko::transpose(this->get_size())); auto exec = this->get_executor(); - auto trans_cpy = Dense::create(exec, gko::transpose(this->get_size())); + exec->run(dense::make_transpose( + this, make_temporary_output_clone(exec, output).get())); +} - exec->run(dense::make_conj_transpose(this, trans_cpy.get())); - return std::move(trans_cpy); + +template +void Dense::conj_transpose(Dense *output) const +{ + GKO_ASSERT_EQUAL_DIMENSIONS(output, gko::transpose(this->get_size())); + auto exec = this->get_executor(); + exec->run(dense::make_conj_transpose( + this, make_temporary_output_clone(exec, output).get())); } template -std::unique_ptr Dense::permute( - const Array *permutation_indices) const +template +void Dense::permute_impl(const Array *permutation_indices, + Dense *output) const { GKO_ASSERT_IS_SQUARE_MATRIX(this); + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); auto exec = this->get_executor(); - auto permute_cpy = Dense::create(exec, this->get_size()); exec->run(dense::make_symm_permute( make_temporary_clone(exec, permutation_indices).get(), this, - permute_cpy.get())); - - return std::move(permute_cpy); + make_temporary_output_clone(exec, output).get())); } template -std::unique_ptr Dense::permute( - const Array *permutation_indices) const +template +void Dense::inverse_permute_impl( + const Array *permutation_indices, Dense *output) const { GKO_ASSERT_IS_SQUARE_MATRIX(this); + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); auto exec = this->get_executor(); - auto permute_cpy = Dense::create(exec, this->get_size()); - exec->run(dense::make_symm_permute( + exec->run(dense::make_inv_symm_permute( make_temporary_clone(exec, permutation_indices).get(), this, - permute_cpy.get())); - - return std::move(permute_cpy); + make_temporary_output_clone(exec, output).get())); } template -std::unique_ptr Dense::inverse_permute( - const Array *permutation_indices) const +template +void Dense::row_permute_impl( + const Array *permutation_indices, Dense *output) const { - GKO_ASSERT_IS_SQUARE_MATRIX(this); GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); auto exec = this->get_executor(); - auto permute_cpy = Dense::create(exec, this->get_size()); - exec->run(dense::make_inv_symm_permute( + exec->run(dense::make_row_gather( make_temporary_clone(exec, permutation_indices).get(), this, - permute_cpy.get())); + make_temporary_output_clone(exec, output).get())); +} - return std::move(permute_cpy); + +template +template +void Dense::row_gather_impl(const Array *row_indices, + Dense *row_gathered) const +{ + auto exec = this->get_executor(); + dim<2> expected_dim{row_indices->get_num_elems(), this->get_size()[1]}; + GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, row_gathered); + + exec->run(dense::make_row_gather( + make_temporary_clone(exec, row_indices).get(), this, + make_temporary_output_clone(exec, row_gathered).get())); } template -std::unique_ptr Dense::inverse_permute( - const Array *permutation_indices) const +template +void Dense::column_permute_impl( + const Array *permutation_indices, Dense *output) const { - GKO_ASSERT_IS_SQUARE_MATRIX(this); - GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); auto exec = this->get_executor(); - auto permute_cpy = Dense::create(exec, this->get_size()); - exec->run(dense::make_inv_symm_permute( + exec->run(dense::make_column_permute( make_temporary_clone(exec, permutation_indices).get(), this, - permute_cpy.get())); - - return std::move(permute_cpy); + make_temporary_output_clone(exec, output).get())); } template -std::unique_ptr Dense::row_permute( - const Array *permutation_indices) const +template +void Dense::inverse_row_permute_impl( + const Array *permutation_indices, Dense *output) const { GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); auto exec = this->get_executor(); - auto permute_cpy = Dense::create(exec, this->get_size()); - exec->run(dense::make_row_gather( + exec->run(dense::make_inverse_row_permute( make_temporary_clone(exec, permutation_indices).get(), this, - permute_cpy.get())); + make_temporary_output_clone(exec, output).get())); +} + - return std::move(permute_cpy); +template +template +void Dense::inverse_column_permute_impl( + const Array *permutation_indices, Dense *output) const +{ + GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); + auto exec = this->get_executor(); + + exec->run(dense::make_inverse_column_permute( + make_temporary_clone(exec, permutation_indices).get(), this, + make_temporary_output_clone(exec, output).get())); +} + + +template +std::unique_ptr Dense::permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->permute(permutation_indices, result.get()); + return result; +} + + +template +std::unique_ptr Dense::permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->permute(permutation_indices, result.get()); + return result; +} + + +template +void Dense::permute(const Array *permutation_indices, + Dense *output) const +{ + this->permute_impl(permutation_indices, output); +} + + +template +void Dense::permute(const Array *permutation_indices, + Dense *output) const +{ + this->permute_impl(permutation_indices, output); +} + + +template +std::unique_ptr Dense::inverse_permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->inverse_permute(permutation_indices, result.get()); + return result; +} + + +template +std::unique_ptr Dense::inverse_permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->inverse_permute(permutation_indices, result.get()); + return result; +} + + +template +void Dense::inverse_permute(const Array *permutation_indices, + Dense *output) const +{ + this->inverse_permute_impl(permutation_indices, output); +} + + +template +void Dense::inverse_permute(const Array *permutation_indices, + Dense *output) const +{ + this->inverse_permute_impl(permutation_indices, output); +} + + +template +std::unique_ptr Dense::row_permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->row_permute(permutation_indices, result.get()); + return result; } @@ -745,15 +905,25 @@ template std::unique_ptr Dense::row_permute( const Array *permutation_indices) const { - GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); - auto exec = this->get_executor(); - auto permute_cpy = Dense::create(exec, this->get_size()); + auto result = Dense::create(this->get_executor(), this->get_size()); + this->row_permute(permutation_indices, result.get()); + return result; +} + + +template +void Dense::row_permute(const Array *permutation_indices, + Dense *output) const +{ + this->row_permute_impl(permutation_indices, output); +} - exec->run(dense::make_row_gather( - make_temporary_clone(exec, permutation_indices).get(), this, - permute_cpy.get())); - return std::move(permute_cpy); +template +void Dense::row_permute(const Array *permutation_indices, + Dense *output) const +{ + this->row_permute_impl(permutation_indices, output); } @@ -763,12 +933,9 @@ std::unique_ptr> Dense::row_gather( { auto exec = this->get_executor(); dim<2> out_dim{row_indices->get_num_elems(), this->get_size()[1]}; - auto row_gathered = Dense::create(exec, out_dim); - - exec->run( - dense::make_row_gather(make_temporary_clone(exec, row_indices).get(), - this, row_gathered.get())); - return row_gathered; + auto result = Dense::create(exec, out_dim); + this->row_gather(row_indices, result.get()); + return result; } @@ -778,12 +945,9 @@ std::unique_ptr> Dense::row_gather( { auto exec = this->get_executor(); dim<2> out_dim{row_indices->get_num_elems(), this->get_size()[1]}; - auto row_gathered = Dense::create(exec, out_dim); - - exec->run( - dense::make_row_gather(make_temporary_clone(exec, row_indices).get(), - this, row_gathered.get())); - return row_gathered; + auto result = Dense::create(exec, out_dim); + this->row_gather(row_indices, result.get()); + return result; } @@ -791,13 +955,7 @@ template void Dense::row_gather(const Array *row_indices, Dense *row_gathered) const { - auto exec = this->get_executor(); - dim<2> expected_dim{row_indices->get_num_elems(), this->get_size()[1]}; - GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, row_gathered); - - exec->run(dense::make_row_gather( - make_temporary_clone(exec, row_indices).get(), this, - make_temporary_clone(exec, row_gathered).get())); + this->row_gather_impl(row_indices, row_gathered); } @@ -805,14 +963,7 @@ template void Dense::row_gather(const Array *row_indices, Dense *row_gathered) const { - dim<2> expected_dim{row_indices->get_num_elems(), this->get_size()[1]}; - GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, row_gathered); - - auto exec = this->get_executor(); - - this->get_executor()->run(dense::make_row_gather( - make_temporary_clone(exec, row_indices).get(), this, - make_temporary_clone(exec, row_gathered).get())); + this->row_gather_impl(row_indices, row_gathered); } @@ -820,15 +971,9 @@ template std::unique_ptr Dense::column_permute( const Array *permutation_indices) const { - GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); - auto exec = this->get_executor(); - auto permute_cpy = Dense::create(exec, this->get_size()); - - exec->run(dense::make_column_permute( - make_temporary_clone(exec, permutation_indices).get(), this, - permute_cpy.get())); - - return std::move(permute_cpy); + auto result = Dense::create(this->get_executor(), this->get_size()); + this->column_permute(permutation_indices, result.get()); + return result; } @@ -836,31 +981,35 @@ template std::unique_ptr Dense::column_permute( const Array *permutation_indices) const { - GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); - auto exec = this->get_executor(); - auto permute_cpy = Dense::create(exec, this->get_size()); + auto result = Dense::create(this->get_executor(), this->get_size()); + this->column_permute(permutation_indices, result.get()); + return result; +} - exec->run(dense::make_column_permute( - make_temporary_clone(exec, permutation_indices).get(), this, - permute_cpy.get())); - return std::move(permute_cpy); +template +void Dense::column_permute(const Array *permutation_indices, + Dense *output) const +{ + this->column_permute_impl(permutation_indices, output); } template -std::unique_ptr Dense::inverse_row_permute( - const Array *permutation_indices) const +void Dense::column_permute(const Array *permutation_indices, + Dense *output) const { - GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); - auto exec = this->get_executor(); - auto inverse_permute_cpy = Dense::create(exec, this->get_size()); + this->column_permute_impl(permutation_indices, output); +} - exec->run(dense::make_inverse_row_permute( - make_temporary_clone(exec, permutation_indices).get(), this, - inverse_permute_cpy.get())); - return std::move(inverse_permute_cpy); +template +std::unique_ptr Dense::inverse_row_permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->inverse_row_permute(permutation_indices, result.get()); + return result; } @@ -868,31 +1017,35 @@ template std::unique_ptr Dense::inverse_row_permute( const Array *permutation_indices) const { - GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); - auto exec = this->get_executor(); - auto inverse_permute_cpy = Dense::create(exec, this->get_size()); + auto result = Dense::create(this->get_executor(), this->get_size()); + this->inverse_row_permute(permutation_indices, result.get()); + return result; +} - exec->run(dense::make_inverse_row_permute( - make_temporary_clone(exec, permutation_indices).get(), this, - inverse_permute_cpy.get())); - return std::move(inverse_permute_cpy); +template +void Dense::inverse_row_permute( + const Array *permutation_indices, Dense *output) const +{ + this->inverse_row_permute_impl(permutation_indices, output); } template -std::unique_ptr Dense::inverse_column_permute( - const Array *permutation_indices) const +void Dense::inverse_row_permute( + const Array *permutation_indices, Dense *output) const { - GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); - auto exec = this->get_executor(); - auto inverse_permute_cpy = Dense::create(exec, this->get_size()); + this->inverse_row_permute_impl(permutation_indices, output); +} - exec->run(dense::make_inverse_column_permute( - make_temporary_clone(exec, permutation_indices).get(), this, - inverse_permute_cpy.get())); - return std::move(inverse_permute_cpy); +template +std::unique_ptr Dense::inverse_column_permute( + const Array *permutation_indices) const +{ + auto result = Dense::create(this->get_executor(), this->get_size()); + this->inverse_column_permute(permutation_indices, result.get()); + return result; } @@ -900,26 +1053,46 @@ template std::unique_ptr Dense::inverse_column_permute( const Array *permutation_indices) const { - GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); - auto exec = this->get_executor(); - auto inverse_permute_cpy = Dense::create(exec, this->get_size()); + auto result = Dense::create(this->get_executor(), this->get_size()); + this->inverse_column_permute(permutation_indices, result.get()); + return result; +} - exec->run(dense::make_inverse_column_permute( - make_temporary_clone(exec, permutation_indices).get(), this, - inverse_permute_cpy.get())); - return std::move(inverse_permute_cpy); +template +void Dense::inverse_column_permute( + const Array *permutation_indices, Dense *output) const +{ + this->inverse_column_permute_impl(permutation_indices, output); } template -std::unique_ptr> Dense::extract_diagonal() const +void Dense::inverse_column_permute( + const Array *permutation_indices, Dense *output) const +{ + this->inverse_column_permute_impl(permutation_indices, output); +} + + +template +void Dense::extract_diagonal(Diagonal *output) const { auto exec = this->get_executor(); + const auto diag_size = std::min(this->get_size()[0], this->get_size()[1]); + GKO_ASSERT_EQ(output->get_size()[0], diag_size); + + exec->run(dense::make_extract_diagonal( + this, make_temporary_output_clone(exec, output).get())); +} + +template +std::unique_ptr> Dense::extract_diagonal() const +{ const auto diag_size = std::min(this->get_size()[0], this->get_size()[1]); - auto diag = Diagonal::create(exec, diag_size); - exec->run(dense::make_extract_diagonal(this, lend(diag))); + auto diag = Diagonal::create(this->get_executor(), diag_size); + this->extract_diagonal(diag.get()); return diag; } @@ -927,9 +1100,7 @@ std::unique_ptr> Dense::extract_diagonal() const template void Dense::compute_absolute_inplace() { - auto exec = this->get_executor(); - - exec->run(dense::make_inplace_absolute_dense(this)); + this->get_executor()->run(dense::make_inplace_absolute_dense(this)); } @@ -937,92 +1108,88 @@ template std::unique_ptr::absolute_type> Dense::compute_absolute() const { - auto exec = this->get_executor(); - // do not inherit the stride - auto abs_dense = absolute_type::create(exec, this->get_size()); - - exec->run(dense::make_outplace_absolute_dense(this, abs_dense.get())); - - return abs_dense; + auto result = absolute_type::create(this->get_executor(), this->get_size()); + this->compute_absolute(result.get()); + return result; } template -std::unique_ptr::complex_type> -Dense::make_complex() const +void Dense::compute_absolute( + Dense::absolute_type *output) const { + GKO_ASSERT_EQUAL_DIMENSIONS(this, output); auto exec = this->get_executor(); - auto complex_dense = complex_type::create(exec, this->get_size()); + exec->run(dense::make_outplace_absolute_dense( + this, make_temporary_output_clone(exec, output).get())); +} - exec->run(dense::make_make_complex(this, complex_dense.get())); - return complex_dense; +template +std::unique_ptr::complex_type> +Dense::make_complex() const +{ + auto result = complex_type::create(this->get_executor(), this->get_size()); + this->make_complex(result.get()); + return result; } template -void Dense::make_complex(Dense> *result) const +void Dense::make_complex( + typename Dense::complex_type *result) const { - auto exec = this->get_executor(); - GKO_ASSERT_EQUAL_DIMENSIONS(this, result); + auto exec = this->get_executor(); exec->run(dense::make_make_complex( - this, make_temporary_clone(exec, result).get())); + this, make_temporary_output_clone(exec, result).get())); } template -std::unique_ptr::absolute_type> +std::unique_ptr::real_type> Dense::get_real() const { - auto exec = this->get_executor(); - - auto real_dense = absolute_type::create(exec, this->get_size()); - - exec->run(dense::make_get_real(this, real_dense.get())); - - return real_dense; + auto result = real_type::create(this->get_executor(), this->get_size()); + this->get_real(result.get()); + return result; } template -void Dense::get_real(Dense> *result) const +void Dense::get_real( + typename Dense::real_type *result) const { - auto exec = this->get_executor(); - GKO_ASSERT_EQUAL_DIMENSIONS(this, result); + auto exec = this->get_executor(); - exec->run( - dense::make_get_real(this, make_temporary_clone(exec, result).get())); + exec->run(dense::make_get_real( + this, make_temporary_output_clone(exec, result).get())); } template -std::unique_ptr::absolute_type> +std::unique_ptr::real_type> Dense::get_imag() const { - auto exec = this->get_executor(); - - auto imag_dense = absolute_type::create(exec, this->get_size()); - - exec->run(dense::make_get_imag(this, imag_dense.get())); - - return imag_dense; + auto result = real_type::create(this->get_executor(), this->get_size()); + this->get_imag(result.get()); + return result; } template -void Dense::get_imag(Dense> *result) const +void Dense::get_imag( + typename Dense::real_type *result) const { - auto exec = this->get_executor(); - GKO_ASSERT_EQUAL_DIMENSIONS(this, result); + auto exec = this->get_executor(); - exec->run( - dense::make_get_imag(this, make_temporary_clone(exec, result).get())); + exec->run(dense::make_get_imag( + this, make_temporary_output_clone(exec, result).get())); } diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 1cbe7f1001a..6b1e57f0a27 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -57,6 +57,11 @@ namespace kernels { const matrix::Dense<_type> *a, const matrix::Dense<_type> *b, \ const matrix::Dense<_type> *beta, matrix::Dense<_type> *c) +#define GKO_DECLARE_DENSE_COPY_KERNEL(_intype, _outtype) \ + void copy(std::shared_ptr exec, \ + const matrix::Dense<_intype> *input, \ + matrix::Dense<_outtype> *output) + #define GKO_DECLARE_DENSE_FILL_KERNEL(_type) \ void fill(std::shared_ptr exec, \ matrix::Dense<_type> *mat, _type value) @@ -225,6 +230,8 @@ namespace kernels { GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL(ValueType); \ template \ GKO_DECLARE_DENSE_APPLY_KERNEL(ValueType); \ + template \ + GKO_DECLARE_DENSE_COPY_KERNEL(InValueType, OutValueType); \ template \ GKO_DECLARE_DENSE_FILL_KERNEL(ValueType); \ template \ diff --git a/core/test/base/array.cpp b/core/test/base/array.cpp index 91c5e8867b9..708bfcf5dad 100644 --- a/core/test/base/array.cpp +++ b/core/test/base/array.cpp @@ -272,6 +272,14 @@ TYPED_TEST(Array, CanCreateTemporaryCloneOnSameExecutor) } +TYPED_TEST(Array, CanCreateTemporaryOutputCloneOnSameExecutor) +{ + auto tmp_clone = make_temporary_output_clone(this->exec, &this->x); + + ASSERT_EQ(tmp_clone.get(), &this->x); +} + + // For tests between different memory, check cuda/test/base/array.cu TYPED_TEST(Array, DoesNotCreateATemporaryCloneBetweenSameMemory) { @@ -299,6 +307,27 @@ TYPED_TEST(Array, DoesNotCopyBackTemporaryCloneBetweenSameMemory) } +TYPED_TEST(Array, CanCreateTemporaryOutputCloneOnDifferentExecutors) +{ + auto other = gko::OmpExecutor::create(); + + { + auto tmp_clone = make_temporary_output_clone(other, &this->x); + tmp_clone->get_data()[0] = 4; + tmp_clone->get_data()[1] = 5; + + // there is no reliable way to check the memory is uninitialized + ASSERT_EQ(tmp_clone->get_num_elems(), this->x.get_num_elems()); + ASSERT_EQ(tmp_clone->get_executor(), other); + ASSERT_EQ(this->x.get_executor(), this->exec); + ASSERT_EQ(this->x.get_data()[0], TypeParam{5}); + ASSERT_EQ(this->x.get_data()[1], TypeParam{2}); + } + ASSERT_EQ(this->x.get_data()[0], TypeParam{4}); + ASSERT_EQ(this->x.get_data()[1], TypeParam{5}); +} + + TYPED_TEST(Array, CanBeCleared) { this->x.clear(); diff --git a/core/test/base/utils.cpp b/core/test/base/utils.cpp index b66a1260a3a..6d8985e1dff 100644 --- a/core/test/base/utils.cpp +++ b/core/test/base/utils.cpp @@ -410,13 +410,39 @@ TEST_F(TemporaryClone, DoesNotCopyToSameMemory) } +TEST_F(TemporaryClone, OutputDoesNotCopyToSameMemory) +{ + auto other = gko::ReferenceExecutor::create(); + auto clone = make_temporary_output_clone(other, gko::lend(obj)); + + ASSERT_NE(clone.get()->get_executor(), other); + ASSERT_EQ(obj->get_executor(), ref); +} + + TEST_F(TemporaryClone, CopiesBackAfterLeavingScope) { + obj->data = 4; { auto clone = make_temporary_clone(omp, gko::lend(obj)); clone.get()->data = 7; + + ASSERT_EQ(obj->data, 4); } + ASSERT_EQ(obj->get_executor(), ref); + ASSERT_EQ(obj->data, 7); +} + +TEST_F(TemporaryClone, OutputCopiesBackAfterLeavingScope) +{ + obj->data = 4; + { + auto clone = make_temporary_output_clone(omp, gko::lend(obj)); + clone.get()->data = 7; + + ASSERT_EQ(obj->data, 4); + } ASSERT_EQ(obj->get_executor(), ref); ASSERT_EQ(obj->data, 7); } diff --git a/core/test/matrix/identity.cpp b/core/test/matrix/identity.cpp index f2d9ad58084..cffe69d0bcc 100644 --- a/core/test/matrix/identity.cpp +++ b/core/test/matrix/identity.cpp @@ -96,36 +96,6 @@ TYPED_TEST(Identity, FailsConstructionWithRectangularSize) } -TYPED_TEST(Identity, AppliesToVector) -{ - using Id = typename TestFixture::Id; - using Vec = typename TestFixture::Vec; - auto identity = Id::create(this->exec, 3); - auto x = Vec::create(this->exec, gko::dim<2>{3, 1}); - auto b = gko::initialize({2.0, 1.0, 5.0}, this->exec); - - identity->apply(b.get(), x.get()); - - GKO_ASSERT_MTX_NEAR(x, l({2.0, 1.0, 5.0}), 0.0); -} - - -TYPED_TEST(Identity, AppliesToMultipleVectors) -{ - using Id = typename TestFixture::Id; - using Vec = typename TestFixture::Vec; - using T = typename TestFixture::value_type; - auto identity = Id::create(this->exec, 3); - auto x = Vec::create(this->exec, gko::dim<2>{3, 2}, 3); - auto b = gko::initialize( - 3, {I{2.0, 3.0}, I{1.0, 2.0}, I{5.0, -1.0}}, this->exec); - - identity->apply(b.get(), x.get()); - - GKO_ASSERT_MTX_NEAR(x, l({{2.0, 3.0}, {1.0, 2.0}, {5.0, -1.0}}), 0.0); -} - - template class IdentityFactory : public ::testing::Test { protected: diff --git a/cuda/matrix/dense_kernels.cu b/cuda/matrix/dense_kernels.cu index b455d16f63b..52caf38ddc0 100644 --- a/cuda/matrix/dense_kernels.cu +++ b/cuda/matrix/dense_kernels.cu @@ -117,6 +117,23 @@ void apply(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +template +void copy(std::shared_ptr exec, + const matrix::Dense *input, + matrix::Dense *output) +{ + const auto num_blocks = ceildiv(input->get_size()[0] * input->get_size()[1], + default_block_size); + kernel::strided_copy<<>>( + input->get_size()[0], input->get_size()[1], input->get_stride(), + output->get_stride(), as_cuda_type(input->get_const_values()), + as_cuda_type(output->get_values())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY( + GKO_DECLARE_DENSE_COPY_KERNEL); + + template void fill(std::shared_ptr exec, matrix::Dense *mat, ValueType value) diff --git a/cuda/test/matrix/dense_kernels.cpp b/cuda/test/matrix/dense_kernels.cpp index e0986cff2de..23d394ad546 100644 --- a/cuda/test/matrix/dense_kernels.cpp +++ b/cuda/test/matrix/dense_kernels.cpp @@ -65,6 +65,7 @@ class Dense : public ::testing::Test { using NormVector = gko::matrix::Dense>; using Arr = gko::Array; using ComplexMtx = gko::matrix::Dense>; + using Diagonal = gko::matrix::Diagonal; using MixedComplexMtx = gko::matrix::Dense>>; @@ -190,6 +191,25 @@ class Dense : public ::testing::Test { }; +TEST_F(Dense, CudaCopyRespectsStride) +{ + set_up_vector_data(3); + auto stride = dx->get_size()[1] + 1; + auto result = Mtx::create(cuda, dx->get_size(), stride); + double val = 123456789.0; + auto original_data = result->get_values(); + auto padding_ptr = original_data + dx->get_size()[1]; + cuda->copy_from(ref.get(), 1, &val, padding_ptr); + + dx->convert_to(result.get()); + + GKO_ASSERT_MTX_NEAR(result, dx, 0); + ASSERT_EQ(result->get_stride(), stride); + ASSERT_EQ(cuda->copy_val_to_host(padding_ptr), val); + ASSERT_EQ(result->get_values(), original_data); +} + + TEST_F(Dense, CudaFillIsEquivalentToRef) { set_up_vector_data(3); @@ -199,7 +219,7 @@ TEST_F(Dense, CudaFillIsEquivalentToRef) dx->fill(42); result->copy_from(dx.get()); - GKO_ASSERT_MTX_NEAR(result, x, 1e-14); + GKO_ASSERT_MTX_NEAR(result, x, 0); } @@ -523,30 +543,6 @@ TEST_F(Dense, ComputeConjDotComplexIsEquivalentToRef) } -TEST_F(Dense, IsTransposable) -{ - set_up_apply_data(); - - auto trans = x->transpose(); - auto dtrans = dx->transpose(); - - GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), - static_cast(trans.get()), 0); -} - - -TEST_F(Dense, IsConjugateTransposable) -{ - set_up_apply_data(); - - auto trans = c_x->conj_transpose(); - auto dtrans = dc_x->conj_transpose(); - - GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), - static_cast(trans.get()), 0); -} - - TEST_F(Dense, ConvertToCooIsEquivalentToRef) { set_up_apply_data(); @@ -730,6 +726,70 @@ TEST_F(Dense, CalculateTotalColsIsEquivalentToRef) } +TEST_F(Dense, IsTransposable) +{ + set_up_apply_data(); + + auto trans = x->transpose(); + auto dtrans = dx->transpose(); + + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); +} + + +TEST_F(Dense, IsTransposableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto row_span = gko::span{0, x->get_size()[0] - 2}; + auto col_span = gko::span{0, x->get_size()[1] - 2}; + auto sub_x = x->create_submatrix(row_span, col_span); + auto sub_dx = dx->create_submatrix(row_span, col_span); + // create the target matrices on another executor to + // force temporary clone + auto trans = Mtx::create(ref, gko::transpose(sub_x->get_size())); + auto dtrans = Mtx::create(ref, gko::transpose(sub_x->get_size()), + sub_x->get_size()[0] + 4); + + sub_x->transpose(trans.get()); + sub_dx->transpose(dtrans.get()); + + GKO_ASSERT_MTX_NEAR(dtrans, trans, 0); +} + + +TEST_F(Dense, IsConjugateTransposable) +{ + set_up_apply_data(); + + auto trans = c_x->conj_transpose(); + auto dtrans = dc_x->conj_transpose(); + + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); +} + + +TEST_F(Dense, IsConjugateTransposableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto row_span = gko::span{0, c_x->get_size()[0] - 2}; + auto col_span = gko::span{0, c_x->get_size()[1] - 2}; + auto sub_x = c_x->create_submatrix(row_span, col_span); + auto sub_dx = dc_x->create_submatrix(row_span, col_span); + // create the target matrices on another executor to + // force temporary clone + auto trans = ComplexMtx::create(ref, gko::transpose(sub_x->get_size())); + auto dtrans = ComplexMtx::create(ref, gko::transpose(sub_x->get_size()), + sub_x->get_size()[0] + 4); + + sub_x->conj_transpose(trans.get()); + sub_dx->conj_transpose(dtrans.get()); + + GKO_ASSERT_MTX_NEAR(dtrans, trans, 0); +} + + TEST_F(Dense, CanGatherRows) { set_up_apply_data(); @@ -769,6 +829,21 @@ TEST_F(Dense, IsPermutable) } +TEST_F(Dense, IsPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, square->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = + Mtx::create(ref, square->get_size(), square->get_size()[1] + 2); + + square->permute(rpermute_idxs.get(), permuted.get()); + dsquare->permute(rpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsInversePermutable) { set_up_apply_data(); @@ -781,6 +856,21 @@ TEST_F(Dense, IsInversePermutable) } +TEST_F(Dense, IsInversePermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, square->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = + Mtx::create(ref, square->get_size(), square->get_size()[1] + 2); + + square->inverse_permute(rpermute_idxs.get(), permuted.get()); + dsquare->inverse_permute(rpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsRowPermutable) { set_up_apply_data(); @@ -793,6 +883,20 @@ TEST_F(Dense, IsRowPermutable) } +TEST_F(Dense, IsRowPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->row_permute(rpermute_idxs.get(), permuted.get()); + dx->row_permute(rpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsColPermutable) { set_up_apply_data(); @@ -805,6 +909,20 @@ TEST_F(Dense, IsColPermutable) } +TEST_F(Dense, IsColPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->column_permute(cpermute_idxs.get(), permuted.get()); + dx->column_permute(cpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsInverseRowPermutable) { set_up_apply_data(); @@ -817,6 +935,20 @@ TEST_F(Dense, IsInverseRowPermutable) } +TEST_F(Dense, IsInverseRowPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->inverse_row_permute(rpermute_idxs.get(), permuted.get()); + dx->inverse_row_permute(rpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsInverseColPermutable) { set_up_apply_data(); @@ -829,6 +961,20 @@ TEST_F(Dense, IsInverseColPermutable) } +TEST_F(Dense, IsInverseColPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->inverse_column_permute(cpermute_idxs.get(), permuted.get()); + dx->inverse_column_permute(cpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, ExtractDiagonalOnTallSkinnyIsEquivalentToRef) { set_up_apply_data(); @@ -840,6 +986,20 @@ TEST_F(Dense, ExtractDiagonalOnTallSkinnyIsEquivalentToRef) } +TEST_F(Dense, ExtractDiagonalOnTallSkinnyIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto diag = Diagonal::create(ref, x->get_size()[1]); + // test make_temporary_clone + auto ddiag = Diagonal::create(ref, x->get_size()[1]); + + x->extract_diagonal(diag.get()); + dx->extract_diagonal(ddiag.get()); + + GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +} + + TEST_F(Dense, ExtractDiagonalOnShortFatIsEquivalentToRef) { set_up_apply_data(); @@ -851,6 +1011,20 @@ TEST_F(Dense, ExtractDiagonalOnShortFatIsEquivalentToRef) } +TEST_F(Dense, ExtractDiagonalOnShortFatIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto diag = Diagonal::create(ref, y->get_size()[0]); + // test make_temporary_clone + auto ddiag = Diagonal::create(ref, y->get_size()[0]); + + y->extract_diagonal(diag.get()); + dy->extract_diagonal(ddiag.get()); + + GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +} + + TEST_F(Dense, InplaceAbsoluteMatrixIsEquivalentToRef) { set_up_apply_data(); @@ -873,6 +1047,20 @@ TEST_F(Dense, OutplaceAbsoluteMatrixIsEquivalentToRef) } +TEST_F(Dense, OutplaceAbsoluteMatrixIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto abs_x = NormVector::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dabs_x = NormVector::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->compute_absolute(abs_x.get()); + dx->compute_absolute(dabs_x.get()); + + GKO_ASSERT_MTX_NEAR(abs_x, dabs_x, 1e-14); +} + + TEST_F(Dense, MakeComplexIsEquivalentToRef) { set_up_apply_data(); @@ -884,13 +1072,15 @@ TEST_F(Dense, MakeComplexIsEquivalentToRef) } -TEST_F(Dense, MakeComplexWithGivenResultIsEquivalentToRef) +TEST_F(Dense, MakeComplexIntoDenseCrossExecutor) { set_up_apply_data(); - auto complex_x = ComplexMtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dcomplex_x = + ComplexMtx::create(ref, x->get_size(), x->get_size()[1] + 2); + x->make_complex(complex_x.get()); - auto dcomplex_x = ComplexMtx::create(cuda, x->get_size()); dx->make_complex(dcomplex_x.get()); GKO_ASSERT_MTX_NEAR(complex_x, dcomplex_x, 0); @@ -908,13 +1098,14 @@ TEST_F(Dense, GetRealIsEquivalentToRef) } -TEST_F(Dense, GetRealWithGivenResultIsEquivalentToRef) +TEST_F(Dense, GetRealIntoDenseCrossExecutor) { set_up_apply_data(); - auto real_x = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dreal_x = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + x->get_real(real_x.get()); - auto dreal_x = Mtx::create(cuda, dx->get_size()); dx->get_real(dreal_x.get()); GKO_ASSERT_MTX_NEAR(real_x, dreal_x, 0); @@ -932,13 +1123,14 @@ TEST_F(Dense, GetImagIsEquivalentToRef) } -TEST_F(Dense, GetImagWithGivenResultIsEquivalentToRef) +TEST_F(Dense, GetImagIntoDenseCrossExecutor) { set_up_apply_data(); - auto imag_x = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dimag_x = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + x->get_imag(imag_x.get()); - auto dimag_x = Mtx::create(cuda, dx->get_size()); dx->get_imag(dimag_x.get()); GKO_ASSERT_MTX_NEAR(imag_x, dimag_x, 0); diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index 42f4a960861..48dcefbce00 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -84,6 +84,15 @@ void apply(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +template +void copy(std::shared_ptr exec, + const matrix::Dense *input, + matrix::Dense *output) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY( + GKO_DECLARE_DENSE_COPY_KERNEL); + + template void fill(std::shared_ptr exec, matrix::Dense *mat, ValueType value) GKO_NOT_IMPLEMENTED; diff --git a/hip/matrix/dense_kernels.hip.cpp b/hip/matrix/dense_kernels.hip.cpp index 9b7a2f60a6a..257b2b5961d 100644 --- a/hip/matrix/dense_kernels.hip.cpp +++ b/hip/matrix/dense_kernels.hip.cpp @@ -120,6 +120,24 @@ void apply(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +template +void copy(std::shared_ptr exec, + const matrix::Dense *input, + matrix::Dense *output) +{ + const auto num_blocks = ceildiv(input->get_size()[0] * input->get_size()[1], + default_block_size); + hipLaunchKernelGGL(kernel::strided_copy, num_blocks, default_block_size, 0, + 0, input->get_size()[0], input->get_size()[1], + input->get_stride(), output->get_stride(), + as_hip_type(input->get_const_values()), + as_hip_type(output->get_values())); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY( + GKO_DECLARE_DENSE_COPY_KERNEL); + + template void fill(std::shared_ptr exec, matrix::Dense *mat, ValueType value) diff --git a/hip/test/matrix/dense_kernels.hip.cpp b/hip/test/matrix/dense_kernels.hip.cpp index 72e69c7cc40..18c5a6dc0b5 100644 --- a/hip/test/matrix/dense_kernels.hip.cpp +++ b/hip/test/matrix/dense_kernels.hip.cpp @@ -65,6 +65,7 @@ class Dense : public ::testing::Test { using NormVector = gko::matrix::Dense>; using Arr = gko::Array; using ComplexMtx = gko::matrix::Dense>; + using Diagonal = gko::matrix::Diagonal; using MixedComplexMtx = gko::matrix::Dense>>; @@ -185,6 +186,25 @@ class Dense : public ::testing::Test { }; +TEST_F(Dense, HipCopyRespectsStride) +{ + set_up_vector_data(3); + auto stride = dx->get_size()[1] + 1; + auto result = Mtx::create(hip, dx->get_size(), stride); + double val = 123456789.0; + auto original_data = result->get_values(); + auto padding_ptr = original_data + dx->get_size()[1]; + hip->copy_from(ref.get(), 1, &val, padding_ptr); + + dx->convert_to(result.get()); + + GKO_ASSERT_MTX_NEAR(result, dx, 0); + ASSERT_EQ(result->get_stride(), stride); + ASSERT_EQ(hip->copy_val_to_host(padding_ptr), val); + ASSERT_EQ(result->get_values(), original_data); +} + + TEST_F(Dense, HipFillIsEquivalentToRef) { set_up_vector_data(3); @@ -518,18 +538,6 @@ TEST_F(Dense, ComputeConjDotComplexIsEquivalentToRef) } -TEST_F(Dense, IsTransposable) -{ - set_up_apply_data(); - - auto trans = x->transpose(); - auto dtrans = dx->transpose(); - - GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), - static_cast(trans.get()), 0); -} - - TEST_F(Dense, ConvertToCooIsEquivalentToRef) { set_up_apply_data(); @@ -713,6 +721,38 @@ TEST_F(Dense, CalculateTotalColsIsEquivalentToRef) } +TEST_F(Dense, IsTransposable) +{ + set_up_apply_data(); + + auto trans = x->transpose(); + auto dtrans = dx->transpose(); + + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); +} + + +TEST_F(Dense, IsTransposableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto row_span = gko::span{0, x->get_size()[0] - 2}; + auto col_span = gko::span{0, x->get_size()[1] - 2}; + auto sub_x = x->create_submatrix(row_span, col_span); + auto sub_dx = dx->create_submatrix(row_span, col_span); + // create the target matrices on another executor to + // force temporary clone + auto trans = Mtx::create(ref, gko::transpose(sub_x->get_size())); + auto dtrans = Mtx::create(ref, gko::transpose(sub_x->get_size()), + sub_x->get_size()[0] + 4); + + sub_x->transpose(trans.get()); + sub_dx->transpose(dtrans.get()); + + GKO_ASSERT_MTX_NEAR(dtrans, trans, 0); +} + + TEST_F(Dense, CanGatherRows) { set_up_apply_data(); @@ -752,6 +792,21 @@ TEST_F(Dense, IsPermutable) } +TEST_F(Dense, IsPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, square->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = + Mtx::create(ref, square->get_size(), square->get_size()[1] + 2); + + square->permute(rpermute_idxs.get(), permuted.get()); + dsquare->permute(rpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsInversePermutable) { set_up_apply_data(); @@ -764,6 +819,21 @@ TEST_F(Dense, IsInversePermutable) } +TEST_F(Dense, IsInversePermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, square->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = + Mtx::create(ref, square->get_size(), square->get_size()[1] + 2); + + square->inverse_permute(rpermute_idxs.get(), permuted.get()); + dsquare->inverse_permute(rpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsRowPermutable) { set_up_apply_data(); @@ -776,6 +846,20 @@ TEST_F(Dense, IsRowPermutable) } +TEST_F(Dense, IsRowPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->row_permute(rpermute_idxs.get(), permuted.get()); + dx->row_permute(rpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsColPermutable) { set_up_apply_data(); @@ -788,6 +872,20 @@ TEST_F(Dense, IsColPermutable) } +TEST_F(Dense, IsColPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->column_permute(cpermute_idxs.get(), permuted.get()); + dx->column_permute(cpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsInverseRowPermutable) { set_up_apply_data(); @@ -800,6 +898,20 @@ TEST_F(Dense, IsInverseRowPermutable) } +TEST_F(Dense, IsInverseRowPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->inverse_row_permute(rpermute_idxs.get(), permuted.get()); + dx->inverse_row_permute(rpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsInverseColPermutable) { set_up_apply_data(); @@ -812,6 +924,20 @@ TEST_F(Dense, IsInverseColPermutable) } +TEST_F(Dense, IsInverseColPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->inverse_column_permute(cpermute_idxs.get(), permuted.get()); + dx->inverse_column_permute(cpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, ExtractDiagonalOnTallSkinnyIsEquivalentToRef) { set_up_apply_data(); @@ -823,6 +949,20 @@ TEST_F(Dense, ExtractDiagonalOnTallSkinnyIsEquivalentToRef) } +TEST_F(Dense, ExtractDiagonalOnTallSkinnyIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto diag = Diagonal::create(ref, x->get_size()[1]); + // test make_temporary_clone + auto ddiag = Diagonal::create(ref, x->get_size()[1]); + + x->extract_diagonal(diag.get()); + dx->extract_diagonal(ddiag.get()); + + GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +} + + TEST_F(Dense, ExtractDiagonalOnShortFatIsEquivalentToRef) { set_up_apply_data(); @@ -834,6 +974,20 @@ TEST_F(Dense, ExtractDiagonalOnShortFatIsEquivalentToRef) } +TEST_F(Dense, ExtractDiagonalOnShortFatIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto diag = Diagonal::create(ref, y->get_size()[0]); + // test make_temporary_clone + auto ddiag = Diagonal::create(ref, y->get_size()[0]); + + y->extract_diagonal(diag.get()); + dy->extract_diagonal(ddiag.get()); + + GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +} + + TEST_F(Dense, InplaceAbsoluteMatrixIsEquivalentToRef) { set_up_apply_data(); @@ -856,6 +1010,20 @@ TEST_F(Dense, OutplaceAbsoluteMatrixIsEquivalentToRef) } +TEST_F(Dense, OutplaceAbsoluteMatrixIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto abs_x = NormVector::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dabs_x = NormVector::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->compute_absolute(abs_x.get()); + dx->compute_absolute(dabs_x.get()); + + GKO_ASSERT_MTX_NEAR(abs_x, dabs_x, 1e-14); +} + + TEST_F(Dense, MakeComplexIsEquivalentToRef) { set_up_apply_data(); @@ -867,13 +1035,15 @@ TEST_F(Dense, MakeComplexIsEquivalentToRef) } -TEST_F(Dense, MakeComplexWithGivenResultIsEquivalentToRef) +TEST_F(Dense, MakeComplexIntoDenseCrossExecutor) { set_up_apply_data(); - auto complex_x = ComplexMtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dcomplex_x = + ComplexMtx::create(ref, x->get_size(), x->get_size()[1] + 2); + x->make_complex(complex_x.get()); - auto dcomplex_x = ComplexMtx::create(hip, x->get_size()); dx->make_complex(dcomplex_x.get()); GKO_ASSERT_MTX_NEAR(complex_x, dcomplex_x, 0); @@ -891,13 +1061,14 @@ TEST_F(Dense, GetRealIsEquivalentToRef) } -TEST_F(Dense, GetRealWithGivenResultIsEquivalentToRef) +TEST_F(Dense, GetRealIntoDenseCrossExecutor) { set_up_apply_data(); - auto real_x = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dreal_x = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + x->get_real(real_x.get()); - auto dreal_x = Mtx::create(hip, dx->get_size()); dx->get_real(dreal_x.get()); GKO_ASSERT_MTX_NEAR(real_x, dreal_x, 0); @@ -915,13 +1086,14 @@ TEST_F(Dense, GetImagIsEquivalentToRef) } -TEST_F(Dense, GetImagWithGivenResultIsEquivalentToRef) +TEST_F(Dense, GetImagIntoDenseCrossExecutor) { set_up_apply_data(); - auto imag_x = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dimag_x = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + x->get_imag(imag_x.get()); - auto dimag_x = Mtx::create(hip, dx->get_size()); dx->get_imag(dimag_x.get()); GKO_ASSERT_MTX_NEAR(imag_x, dimag_x, 0); diff --git a/include/ginkgo/core/base/array.hpp b/include/ginkgo/core/base/array.hpp index c9f4aaa57c4..c4aa2323417 100644 --- a/include/ginkgo/core/base/array.hpp +++ b/include/ginkgo/core/base/array.hpp @@ -576,16 +576,21 @@ namespace detail { template struct temporary_clone_helper> { static std::unique_ptr> create( - std::shared_ptr exec, Array *ptr) + std::shared_ptr exec, Array *ptr, bool copy_data) { - return std::make_unique>(std::move(exec), *ptr); + if (copy_data) { + return std::make_unique>(std::move(exec), *ptr); + } else { + return std::make_unique>(std::move(exec), + ptr->get_num_elems()); + } } }; template struct temporary_clone_helper> { static std::unique_ptr> create( - std::shared_ptr exec, const Array *ptr) + std::shared_ptr exec, const Array *ptr, bool) { return std::make_unique>(std::move(exec), *ptr); } diff --git a/include/ginkgo/core/base/temporary_clone.hpp b/include/ginkgo/core/base/temporary_clone.hpp index c3309d8736a..589a0ca9114 100644 --- a/include/ginkgo/core/base/temporary_clone.hpp +++ b/include/ginkgo/core/base/temporary_clone.hpp @@ -108,7 +108,7 @@ class copy_back_deleter { template struct temporary_clone_helper { static std::unique_ptr create(std::shared_ptr exec, - T *ptr) + T *ptr, bool) { return gko::clone(std::move(exec), ptr); } @@ -137,8 +137,11 @@ class temporary_clone { * * @param exec the executor where the clone will be created * @param ptr a pointer to the object of which the clone will be created + * @param copy_data should the data be copied to the executor, or should + * only the result be copied back afterwards? */ - explicit temporary_clone(std::shared_ptr exec, pointer ptr) + explicit temporary_clone(std::shared_ptr exec, pointer ptr, + bool copy_data = true) { if (ptr->get_executor()->memory_accessible(exec)) { // just use the object we already have @@ -146,10 +149,10 @@ class temporary_clone { } else { // clone the object to the new executor and make sure it's copied // back before we delete it - handle_ = handle_type( - temporary_clone_helper::create(std::move(exec), ptr) - .release(), - copy_back_deleter(ptr)); + handle_ = handle_type(temporary_clone_helper::create( + std::move(exec), ptr, copy_data) + .release(), + copy_back_deleter(ptr)); } } @@ -196,6 +199,29 @@ detail::temporary_clone make_temporary_clone( } +/** + * Creates a uninitialized temporary_clone that will be copied back to the input + * afterwards. It can be used for output parameters to avoid an unnecessary copy + * in make_temporary_clone. + * + * This is a helper function which avoids the need to explicitly specify the + * type of the object, as would be the case if using the constructor of + * temporary_clone. + * + * @param exec the executor where the uninitialized clone will be created + * @param ptr a pointer to the object of which the clone will be created + */ +template +detail::temporary_clone make_temporary_output_clone( + std::shared_ptr exec, T *ptr) +{ + static_assert( + !std::is_const::value, + "make_temporary_output_clone should only be used on non-const objects"); + return detail::temporary_clone(std::move(exec), ptr, false); +} + + } // namespace gko diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index a15d8e24f2f..dc14b8ea83b 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -527,15 +527,6 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x, #endif -/** - * Instantiates a template for each value type conversion pair compiled by - * Ginkgo. - * - * @param _macro A macro which expands the template instantiation - * (not including the leading `template` specifier). - * Should take two arguments `src` and `dst`, which - * are replaced by the source and destination value type. - */ #if GINKGO_DPCPP_SINGLE_MODE #define GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(_macro) \ template <> \ @@ -546,12 +537,50 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x, _macro(std::complex, std::complex) GKO_NOT_IMPLEMENTED; \ template <> \ _macro(std::complex, std::complex) GKO_NOT_IMPLEMENTED + + +#define GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY(_macro) \ + GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(_macro); \ + template _macro(float, float); \ + template <> \ + _macro(double, double) GKO_NOT_IMPLEMENTED; \ + template _macro(std::complex, std::complex); \ + template <> \ + _macro(std::complex, std::complex) GKO_NOT_IMPLEMENTED #else + + +/** + * Instantiates a template for each value type conversion pair compiled by + * Ginkgo. + * + * @param _macro A macro which expands the template instantiation + * (not including the leading `template` specifier). + * Should take two arguments `src` and `dst`, which + * are replaced by the source and destination value type. + */ #define GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(_macro) \ template _macro(float, double); \ template _macro(double, float); \ template _macro(std::complex, std::complex); \ template _macro(std::complex, std::complex) + + +/** + * Instantiates a template for each value type conversion or copy pair compiled + * by Ginkgo. + * + * @param _macro A macro which expands the template instantiation + * (not including the leading `template` specifier). + * Should take two arguments `src` and `dst`, which + * are replaced by the source and destination value type. + */ +#define GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY(_macro) \ + GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(_macro); \ + template _macro(float, float); \ + template _macro(double, double); \ + template _macro(std::complex, std::complex); \ + template _macro(std::complex, std::complex) #endif diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp index 83ada5e4dc1..331be1aa28e 100644 --- a/include/ginkgo/core/matrix/dense.hpp +++ b/include/ginkgo/core/matrix/dense.hpp @@ -143,6 +143,7 @@ class Dense using mat_data = gko::matrix_data; using mat_data32 = gko::matrix_data; using absolute_type = remove_complex; + using real_type = absolute_type; using complex_type = to_complex; using row_major_range = gko::range>; @@ -198,6 +199,10 @@ class Dense friend class Dense>; + void convert_to(Dense *result) const override; + + void move_to(Dense *result) override; + void convert_to(Dense> *result) const override; void move_to(Dense> *result) override; @@ -262,18 +267,76 @@ class Dense std::unique_ptr conj_transpose() const override; + /** + * Writes the transposed matrix into the given output matrix. + * + * @param output The output matrix. It must have the dimensions + * `gko::transpose(this->get_size())` + */ + void transpose(Dense *output) const; + + /** + * Writes the conjugate-transposed matrix into the given output matrix. + * + * @param output The output matrix. It must have the dimensions + * `gko::transpose(this->get_size())` + */ + void conj_transpose(Dense *output) const; + + /** + * Fill the dense matrix with a given value. + * + * @param value the value to be filled + */ + void fill(const ValueType value); + std::unique_ptr permute( const Array *permutation_indices) const override; std::unique_ptr permute( const Array *permutation_indices) const override; + /** + * Writes the symmetrically permuted matrix into the given output matrix. + * + * @param permutation_indices The array containing permutation indices. + * It must have `this->get_size()[0]` elements. + * @param output The output matrix. It must have the dimensions + * `this->get_size()` + * @see Dense::permute(const Array*) + */ + void permute(const Array *permutation_indices, Dense *output) const; + + /** + * @copydoc Dense::permute(const Array*, Dense*) + */ + void permute(const Array *permutation_indices, Dense *output) const; + std::unique_ptr inverse_permute( const Array *permutation_indices) const override; std::unique_ptr inverse_permute( const Array *permutation_indices) const override; + /** + * Writes the inverse symmetrically permuted matrix into the given output + * matrix. + * + * @param permutation_indices The array containing permutation indices. + * It must have `this->get_size()[0]` elements. + * @param output The output matrix. It must have the dimensions + * `this->get_size()` + * @see Dense::inverse_permute(const Array*) + */ + void inverse_permute(const Array *permutation_indices, + Dense *output) const; + + /** + * @copydoc Dense::inverse_permute(const Array*, Dense*) + */ + void inverse_permute(const Array *permutation_indices, + Dense *output) const; + std::unique_ptr row_permute( const Array *permutation_indices) const override; @@ -281,11 +344,22 @@ class Dense const Array *permutation_indices) const override; /** - * Fill the dense matrix with a given value. + * Writes the row-permuted matrix into the given output matrix. * - * @param value the value to be filled + * @param permutation_indices The array containing permutation indices. + * It must have `this->get_size()[0]` elements. + * @param output The output matrix. It must have the dimensions + * `this->get_size()` + * @see Dense::row_permute(const Array*) */ - void fill(const ValueType value); + void row_permute(const Array *permutation_indices, + Dense *output) const; + + /** + * @copydoc Dense::row_permute(const Array*, Dense*) + */ + void row_permute(const Array *permutation_indices, + Dense *output) const; /** * Create a Dense matrix consisting of the given rows from this matrix. @@ -330,22 +404,94 @@ class Dense std::unique_ptr column_permute( const Array *permutation_indices) const override; + /** + * Writes the column-permuted matrix into the given output matrix. + * + * @param permutation_indices The array containing permutation indices. + * It must have `this->get_size()[1]` elements. + * @param output The output matrix. It must have the dimensions + * `this->get_size()` + * @see Dense::column_permute(const Array*) + */ + void column_permute(const Array *permutation_indices, + Dense *output) const; + + /** + * @copydoc Dense::column_permute(const Array*, Dense*) + */ + void column_permute(const Array *permutation_indices, + Dense *output) const; + std::unique_ptr inverse_row_permute( const Array *permutation_indices) const override; std::unique_ptr inverse_row_permute( const Array *permutation_indices) const override; + /** + * Writes the inverse row-permuted matrix into the given output matrix. + * + * @param permutation_indices The array containing permutation indices. + * It must have `this->get_size()[0]` elements. + * @param output The output matrix. It must have the dimensions + * `this->get_size()` + * @see Dense::inverse_row_permute(const Array*) + */ + void inverse_row_permute(const Array *permutation_indices, + Dense *output) const; + + /** + * @copydoc Dense::inverse_row_permute(const Array*, Dense*) + */ + void inverse_row_permute(const Array *permutation_indices, + Dense *output) const; + std::unique_ptr inverse_column_permute( const Array *permutation_indices) const override; std::unique_ptr inverse_column_permute( const Array *permutation_indices) const override; + /** + * Writes the inverse column-permuted matrix into the given output matrix. + * + * @param permutation_indices The array containing permutation indices. + * It must have `this->get_size()[1]` elements. + * @param output The output matrix. It must have the dimensions + * `this->get_size()` + * @see Dense::inverse_column_permute(const Array*) + */ + void inverse_column_permute(const Array *permutation_indices, + Dense *output) const; + + /** + * @copydoc Dense::inverse_column_permute(const Array*, Dense*) + */ + void inverse_column_permute(const Array *permutation_indices, + Dense *output) const; + std::unique_ptr> extract_diagonal() const override; + /** + * Writes the diagonal of this matrix into an existing diagonal matrix. + * + * @param output The output matrix. Its size must match the size of this + * matrix's diagonal. + * @see Dense::extract_diagonal() + */ + void extract_diagonal(Diagonal *output) const; + std::unique_ptr compute_absolute() const override; + /** + * Writes the absolute values of this matrix into an existing matrix. + * + * @param output The output matrix. Its size must match the size of this + * matrix. + * @see Dense::compute_absolute() + */ + void compute_absolute(absolute_type *output) const; + void compute_absolute_inplace() override; /** @@ -359,30 +505,30 @@ class Dense * If the original matrix was real, the imaginary part of the result will * be zero. */ - void make_complex(Dense> *result) const; + void make_complex(complex_type *result) const; /** * Creates a new real matrix and extracts the real part of the original * matrix into that. */ - std::unique_ptr get_real() const; + std::unique_ptr get_real() const; /** * Extracts the real part of the original matrix into a given real matrix. */ - void get_real(Dense> *result) const; + void get_real(real_type *result) const; /** * Creates a new real matrix and extracts the imaginary part of the * original matrix into that. */ - std::unique_ptr get_imag() const; + std::unique_ptr get_imag() const; /** * Extracts the imaginary part of the original matrix into a given real * matrix. */ - void get_imag(Dense> *result) const; + void get_imag(real_type *result) const; /** * Returns a pointer to the array of values of the matrix. @@ -514,7 +660,7 @@ class Dense { auto exec = this->get_executor(); this->compute_dot_impl(make_temporary_clone(exec, b).get(), - make_temporary_clone(exec, result).get()); + make_temporary_output_clone(exec, result).get()); } /** @@ -528,8 +674,9 @@ class Dense void compute_conj_dot(const LinOp *b, LinOp *result) const { auto exec = this->get_executor(); - this->compute_conj_dot_impl(make_temporary_clone(exec, b).get(), - make_temporary_clone(exec, result).get()); + this->compute_conj_dot_impl( + make_temporary_clone(exec, b).get(), + make_temporary_output_clone(exec, result).get()); } /** @@ -542,7 +689,8 @@ class Dense void compute_norm2(LinOp *result) const { auto exec = this->get_executor(); - this->compute_norm2_impl(make_temporary_clone(exec, result).get()); + this->compute_norm2_impl( + make_temporary_output_clone(exec, result).get()); } /** @@ -672,8 +820,10 @@ class Dense values_{exec, std::forward(values)}, stride_{stride} { - GKO_ENSURE_IN_BOUNDS((size[0] - 1) * stride + size[1] - 1, - values_.get_num_elems()); + if (size[0] > 0 && size[1] > 0) { + GKO_ENSURE_IN_BOUNDS((size[0] - 1) * stride + size[1] - 1, + values_.get_num_elems()); + } } /** @@ -783,6 +933,33 @@ class Dense idx % this->get_size()[1]); } + template + void permute_impl(const Array *permutation, Dense *output) const; + + template + void inverse_permute_impl(const Array *permutation, + Dense *output) const; + + template + void row_permute_impl(const Array *permutation, + Dense *output) const; + + template + void inverse_row_permute_impl(const Array *permutation, + Dense *output) const; + + template + void row_gather_impl(const Array *row_indices, + Dense *output) const; + + template + void column_permute_impl(const Array *permutation, + Dense *output) const; + + template + void inverse_column_permute_impl(const Array *permutation, + Dense *output) const; + private: Array values_; size_type stride_; @@ -792,6 +969,27 @@ class Dense } // namespace matrix +namespace detail { + + +template +struct temporary_clone_helper> { + static std::unique_ptr> create( + std::shared_ptr exec, matrix::Dense *ptr, + bool copy_data) + { + if (copy_data) { + return gko::clone(std::move(exec), ptr); + } else { + return matrix::Dense::create(exec, ptr->get_size()); + } + } +}; + + +} // namespace detail + + /** * Creates and initializes a column-vector. * diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index 65938de584f..0534f1deb41 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -127,6 +127,24 @@ void apply(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +template +void copy(std::shared_ptr exec, + const matrix::Dense *input, + matrix::Dense *output) +{ +#pragma omp parallel for + for (size_type row = 0; row < input->get_size()[0]; ++row) { + for (size_type col = 0; col < input->get_size()[1]; ++col) { + output->at(row, col) = + static_cast(input->at(row, col)); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY( + GKO_DECLARE_DENSE_COPY_KERNEL); + + template void fill(std::shared_ptr exec, matrix::Dense *mat, ValueType value) diff --git a/omp/test/matrix/dense_kernels.cpp b/omp/test/matrix/dense_kernels.cpp index b59c43ed4b0..8ade4b939af 100644 --- a/omp/test/matrix/dense_kernels.cpp +++ b/omp/test/matrix/dense_kernels.cpp @@ -70,6 +70,7 @@ class Dense : public ::testing::Test { using NormVector = gko::matrix::Dense>; using Arr = gko::Array; using ComplexMtx = gko::matrix::Dense>; + using Diagonal = gko::matrix::Diagonal; using MixedComplexMtx = gko::matrix::Dense>>; @@ -204,6 +205,25 @@ class Dense : public ::testing::Test { }; +TEST_F(Dense, OmpCopyRespectsStride) +{ + set_up_vector_data(3); + auto stride = dx->get_size()[1] + 1; + auto result = Mtx::create(omp, dx->get_size(), stride); + double val = 123456789.0; + auto original_data = result->get_values(); + auto padding_ptr = original_data + dx->get_size()[1]; + omp->copy_from(ref.get(), 1, &val, padding_ptr); + + dx->convert_to(result.get()); + + GKO_ASSERT_MTX_NEAR(result, dx, 0); + ASSERT_EQ(result->get_stride(), stride); + ASSERT_EQ(omp->copy_val_to_host(padding_ptr), val); + ASSERT_EQ(result->get_values(), original_data); +} + + TEST_F(Dense, OmpFillIsEquivalentToRef) { set_up_vector_data(3); @@ -793,6 +813,23 @@ TEST_F(Dense, ConvertsEmptyToSellp) } +TEST_F(Dense, CalculateNNZPerRowIsEquivalentToRef) +{ + set_up_apply_data(); + gko::Array nnz_per_row(ref); + nnz_per_row.resize_and_reset(x->get_size()[0]); + gko::Array dnnz_per_row(omp); + dnnz_per_row.resize_and_reset(dx->get_size()[0]); + + gko::kernels::reference::dense::calculate_nonzeros_per_row(ref, x.get(), + &nnz_per_row); + gko::kernels::omp::dense::calculate_nonzeros_per_row(omp, dx.get(), + &dnnz_per_row); + + GKO_ASSERT_ARRAY_EQ(nnz_per_row, dnnz_per_row); +} + + TEST_F(Dense, CalculateMaxNNZPerRowIsEquivalentToRef) { std::size_t ref_max_nnz_per_row = 0; @@ -839,6 +876,26 @@ TEST_F(Dense, IsTransposable) } +TEST_F(Dense, IsTransposableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto row_span = gko::span{0, x->get_size()[0] - 2}; + auto col_span = gko::span{0, x->get_size()[1] - 2}; + auto sub_x = x->create_submatrix(row_span, col_span); + auto sub_dx = dx->create_submatrix(row_span, col_span); + // create the target matrices on another executor to + // force temporary clone + auto trans = Mtx::create(ref, gko::transpose(sub_x->get_size())); + auto dtrans = Mtx::create(ref, gko::transpose(sub_x->get_size()), + sub_x->get_size()[0] + 4); + + sub_x->transpose(trans.get()); + sub_dx->transpose(dtrans.get()); + + GKO_ASSERT_MTX_NEAR(dtrans, trans, 0); +} + + TEST_F(Dense, IsConjugateTransposable) { set_up_apply_data(); @@ -851,6 +908,26 @@ TEST_F(Dense, IsConjugateTransposable) } +TEST_F(Dense, IsConjugateTransposableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto row_span = gko::span{0, c_x->get_size()[0] - 2}; + auto col_span = gko::span{0, c_x->get_size()[1] - 2}; + auto sub_x = c_x->create_submatrix(row_span, col_span); + auto sub_dx = dc_x->create_submatrix(row_span, col_span); + // create the target matrices on another executor to + // force temporary clone + auto trans = ComplexMtx::create(ref, gko::transpose(sub_x->get_size())); + auto dtrans = ComplexMtx::create(ref, gko::transpose(sub_x->get_size()), + sub_x->get_size()[0] + 4); + + sub_x->conj_transpose(trans.get()); + sub_dx->conj_transpose(dtrans.get()); + + GKO_ASSERT_MTX_NEAR(dtrans, trans, 0); +} + + TEST_F(Dense, CanGatherRows) { set_up_apply_data(); @@ -862,17 +939,21 @@ TEST_F(Dense, CanGatherRows) } -TEST_F(Dense, CanGatherRowsIntoDense) +TEST_F(Dense, CanGatherRowsIntoDenseCrossExecutor) { set_up_apply_data(); + auto row_span = gko::span{0, x->get_size()[0]}; + auto col_span = gko::span{0, x->get_size()[1] - 2}; + auto sub_x = x->create_submatrix(row_span, col_span); + auto sub_dx = dx->create_submatrix(row_span, col_span); auto gather_size = - gko::dim<2>{rgather_idxs->get_num_elems(), x->get_size()[1]}; + gko::dim<2>{rgather_idxs->get_num_elems(), sub_x->get_size()[1]}; auto r_gather = Mtx::create(ref, gather_size); // test make_temporary_clone and non-default stride - auto dr_gather = Mtx::create(ref, gather_size, x->get_size()[1] + 2); + auto dr_gather = Mtx::create(ref, gather_size, sub_x->get_size()[1] + 2); - x->row_gather(rgather_idxs.get(), r_gather.get()); - dx->row_gather(rgather_idxs.get(), dr_gather.get()); + sub_x->row_gather(rgather_idxs.get(), r_gather.get()); + sub_dx->row_gather(rgather_idxs.get(), dr_gather.get()); GKO_ASSERT_MTX_NEAR(r_gather.get(), dr_gather.get(), 0); } @@ -890,6 +971,21 @@ TEST_F(Dense, IsPermutable) } +TEST_F(Dense, IsPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, square->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = + Mtx::create(ref, square->get_size(), square->get_size()[1] + 2); + + square->permute(rpermute_idxs.get(), permuted.get()); + dsquare->permute(rpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsInversePermutable) { set_up_apply_data(); @@ -902,6 +998,21 @@ TEST_F(Dense, IsInversePermutable) } +TEST_F(Dense, IsInversePermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, square->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = + Mtx::create(ref, square->get_size(), square->get_size()[1] + 2); + + square->inverse_permute(rpermute_idxs.get(), permuted.get()); + dsquare->inverse_permute(rpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsRowPermutable) { set_up_apply_data(); @@ -914,6 +1025,20 @@ TEST_F(Dense, IsRowPermutable) } +TEST_F(Dense, IsRowPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->row_permute(rpermute_idxs.get(), permuted.get()); + dx->row_permute(rpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsColPermutable) { set_up_apply_data(); @@ -926,6 +1051,20 @@ TEST_F(Dense, IsColPermutable) } +TEST_F(Dense, IsColPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->column_permute(cpermute_idxs.get(), permuted.get()); + dx->column_permute(cpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsInverseRowPermutable) { set_up_apply_data(); @@ -938,6 +1077,20 @@ TEST_F(Dense, IsInverseRowPermutable) } +TEST_F(Dense, IsInverseRowPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->inverse_row_permute(rpermute_idxs.get(), permuted.get()); + dx->inverse_row_permute(rpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, IsInverseColPermutable) { set_up_apply_data(); @@ -950,6 +1103,20 @@ TEST_F(Dense, IsInverseColPermutable) } +TEST_F(Dense, IsInverseColPermutableIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto permuted = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dpermuted = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->inverse_column_permute(cpermute_idxs.get(), permuted.get()); + dx->inverse_column_permute(cpermute_idxs.get(), dpermuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, dpermuted, 0); +} + + TEST_F(Dense, ExtractDiagonalOnTallSkinnyIsEquivalentToRef) { set_up_apply_data(); @@ -961,6 +1128,20 @@ TEST_F(Dense, ExtractDiagonalOnTallSkinnyIsEquivalentToRef) } +TEST_F(Dense, ExtractDiagonalOnTallSkinnyIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto diag = Diagonal::create(ref, x->get_size()[1]); + // test make_temporary_clone + auto ddiag = Diagonal::create(ref, x->get_size()[1]); + + x->extract_diagonal(diag.get()); + dx->extract_diagonal(ddiag.get()); + + GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +} + + TEST_F(Dense, ExtractDiagonalOnShortFatIsEquivalentToRef) { set_up_apply_data(); @@ -972,6 +1153,20 @@ TEST_F(Dense, ExtractDiagonalOnShortFatIsEquivalentToRef) } +TEST_F(Dense, ExtractDiagonalOnShortFatIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto diag = Diagonal::create(ref, y->get_size()[0]); + // test make_temporary_clone + auto ddiag = Diagonal::create(ref, y->get_size()[0]); + + y->extract_diagonal(diag.get()); + dy->extract_diagonal(ddiag.get()); + + GKO_ASSERT_MTX_NEAR(diag.get(), ddiag.get(), 0); +} + + TEST_F(Dense, InplaceAbsoluteMatrixIsEquivalentToRef) { set_up_apply_data(); @@ -994,6 +1189,20 @@ TEST_F(Dense, OutplaceAbsoluteMatrixIsEquivalentToRef) } +TEST_F(Dense, OutplaceAbsoluteMatrixIntoDenseCrossExecutor) +{ + set_up_apply_data(); + auto abs_x = NormVector::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dabs_x = NormVector::create(ref, x->get_size(), x->get_size()[1] + 2); + + x->compute_absolute(abs_x.get()); + dx->compute_absolute(dabs_x.get()); + + GKO_ASSERT_MTX_NEAR(abs_x, dabs_x, 1e-14); +} + + TEST_F(Dense, MakeComplexIsEquivalentToRef) { set_up_apply_data(); @@ -1005,13 +1214,15 @@ TEST_F(Dense, MakeComplexIsEquivalentToRef) } -TEST_F(Dense, MakeComplexWithGivenResultIsEquivalentToRef) +TEST_F(Dense, MakeComplexIntoDenseCrossExecutor) { set_up_apply_data(); - auto complex_x = ComplexMtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dcomplex_x = + ComplexMtx::create(ref, x->get_size(), x->get_size()[1] + 2); + x->make_complex(complex_x.get()); - auto dcomplex_x = ComplexMtx::create(omp, x->get_size()); dx->make_complex(dcomplex_x.get()); GKO_ASSERT_MTX_NEAR(complex_x, dcomplex_x, 0); @@ -1029,13 +1240,14 @@ TEST_F(Dense, GetRealIsEquivalentToRef) } -TEST_F(Dense, GetRealWithGivenResultIsEquivalentToRef) +TEST_F(Dense, GetRealIntoDenseCrossExecutor) { set_up_apply_data(); - auto real_x = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dreal_x = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + x->get_real(real_x.get()); - auto dreal_x = Mtx::create(omp, dx->get_size()); dx->get_real(dreal_x.get()); GKO_ASSERT_MTX_NEAR(real_x, dreal_x, 0); @@ -1053,13 +1265,14 @@ TEST_F(Dense, GetImagIsEquivalentToRef) } -TEST_F(Dense, GetImagWithGivenResultIsEquivalentToRef) +TEST_F(Dense, GetImagIntoDenseCrossExecutor) { set_up_apply_data(); - auto imag_x = Mtx::create(ref, x->get_size()); + // test make_temporary_clone and non-default stride + auto dimag_x = Mtx::create(ref, x->get_size(), x->get_size()[1] + 2); + x->get_imag(imag_x.get()); - auto dimag_x = Mtx::create(omp, dx->get_size()); dx->get_imag(dimag_x.get()); GKO_ASSERT_MTX_NEAR(imag_x, dimag_x, 0); diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index fa55253a4ac..49721aab418 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -116,6 +116,23 @@ void apply(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); +template +void copy(std::shared_ptr exec, + const matrix::Dense *input, + matrix::Dense *output) +{ + for (size_type row = 0; row < input->get_size()[0]; ++row) { + for (size_type col = 0; col < input->get_size()[1]; ++col) { + output->at(row, col) = + static_cast(input->at(row, col)); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY( + GKO_DECLARE_DENSE_COPY_KERNEL); + + template void fill(std::shared_ptr exec, matrix::Dense *mat, ValueType value) diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index 4b6a6e73b26..47703ff8349 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -115,6 +115,47 @@ class Dense : public ::testing::Test { TYPED_TEST_SUITE(Dense, gko::test::ValueTypes); +TYPED_TEST(Dense, CopyRespectsStride) +{ + using value_type = typename TestFixture::value_type; + auto m = + gko::initialize>({1.0, 2.0}, this->exec); + auto m2 = + gko::matrix::Dense::create(this->exec, gko::dim<2>{2, 1}, 2); + auto original_data = m2->get_values(); + original_data[1] = TypeParam{3.0}; + + m->convert_to(m2.get()); + + EXPECT_EQ(m2->at(0, 0), value_type{1.0}); + EXPECT_EQ(m2->get_stride(), 2); + EXPECT_EQ(m2->at(1, 0), value_type{2.0}); + EXPECT_EQ(m2->get_values(), original_data); + EXPECT_EQ(original_data[1], TypeParam{3.0}); +} + + +TYPED_TEST(Dense, TemporaryOutputCloneWorks) +{ + using value_type = typename TestFixture::value_type; + auto other = gko::OmpExecutor::create(); + auto m = gko::initialize>({1.0, 2.0}, other); + + { + auto clone = gko::make_temporary_output_clone(this->exec, m.get()); + clone->at(0) = 4.0; + clone->at(1) = 5.0; + + ASSERT_EQ(m->at(0), value_type{1.0}); + ASSERT_EQ(m->at(1), value_type{2.0}); + ASSERT_EQ(clone->get_size(), m->get_size()); + ASSERT_EQ(clone->get_executor(), this->exec); + } + ASSERT_EQ(m->at(0), value_type{4.0}); + ASSERT_EQ(m->at(1), value_type{5.0}); +} + + TYPED_TEST(Dense, CanBeFilledWithValue) { using value_type = typename TestFixture::value_type; @@ -1686,7 +1727,7 @@ TYPED_TEST(Dense, ConvertsToAndFromSellpWithMoreThanOneSlice) x->convert_to(sellp_mtx.get()); sellp_mtx->convert_to(dense_mtx.get()); - GKO_ASSERT_MTX_NEAR(dense_mtx.get(), x.get(), r::value); + GKO_ASSERT_MTX_NEAR(dense_mtx.get(), x.get(), 0.0); } @@ -1909,24 +1950,93 @@ TYPED_TEST(Dense, MovesEmptyToSellp) TYPED_TEST(Dense, SquareMatrixIsTransposable) { using Mtx = typename TestFixture::Mtx; - auto trans = this->mtx5->transpose(); - auto trans_as_dense = static_cast(trans.get()); + using T = typename TestFixture::value_type; + auto trans = gko::as(this->mtx5->transpose()); + + GKO_ASSERT_MTX_NEAR( + trans, l({{1.0, -2.0, 2.1}, {-1.0, 2.0, 3.4}, {-0.5, 4.5, 1.2}}), + 0.0); +} + + +TYPED_TEST(Dense, SquareMatrixIsTransposableIntoDense) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto trans = Mtx::create(this->exec, this->mtx5->get_size()); + + this->mtx5->transpose(trans.get()); GKO_ASSERT_MTX_NEAR( - trans_as_dense, - l({{1.0, -2.0, 2.1}, {-1.0, 2.0, 3.4}, {-0.5, 4.5, 1.2}}), - r::value); + trans, l({{1.0, -2.0, 2.1}, {-1.0, 2.0, 3.4}, {-0.5, 4.5, 1.2}}), + 0.0); +} + + +TYPED_TEST(Dense, SquareSubmatrixIsTransposableIntoDense) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto trans = Mtx::create(this->exec, gko::dim<2>{2, 2}, 4); + + this->mtx5->create_submatrix({0, 2}, {0, 2})->transpose(trans.get()); + + GKO_ASSERT_MTX_NEAR(trans, l({{1.0, -2.0}, {-1.0, 2.0}}), 0.0); + ASSERT_EQ(trans->get_stride(), 4); +} + + +TYPED_TEST(Dense, SquareMatrixIsTransposableIntoDenseFailsForWrongDimensions) +{ + using Mtx = typename TestFixture::Mtx; + + ASSERT_THROW(this->mtx5->transpose(Mtx::create(this->exec).get()), + gko::DimensionMismatch); } TYPED_TEST(Dense, NonSquareMatrixIsTransposable) { using Mtx = typename TestFixture::Mtx; - auto trans = this->mtx4->transpose(); - auto trans_as_dense = static_cast(trans.get()); + using T = typename TestFixture::value_type; + auto trans = gko::as(this->mtx4->transpose()); + + GKO_ASSERT_MTX_NEAR(trans, l({{1.0, 0.0}, {3.0, 5.0}, {2.0, 0.0}}), 0.0); +} + + +TYPED_TEST(Dense, NonSquareMatrixIsTransposableIntoDense) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto trans = + Mtx::create(this->exec, gko::transpose(this->mtx4->get_size())); + + this->mtx4->transpose(trans.get()); + + GKO_ASSERT_MTX_NEAR(trans, l({{1.0, 0.0}, {3.0, 5.0}, {2.0, 0.0}}), 0.0); +} + + +TYPED_TEST(Dense, NonSquareSubmatrixIsTransposableIntoDense) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto trans = Mtx::create(this->exec, gko::dim<2>{2, 1}, 5); + + this->mtx4->create_submatrix({0, 1}, {0, 2})->transpose(trans.get()); + + GKO_ASSERT_MTX_NEAR(trans, l({1.0, 3.0}), 0.0); + ASSERT_EQ(trans->get_stride(), 5); +} + + +TYPED_TEST(Dense, NonSquareMatrixIsTransposableIntoDenseFailsForWrongDimensions) +{ + using Mtx = typename TestFixture::Mtx; - GKO_ASSERT_MTX_NEAR(trans_as_dense, l({{1.0, 0.0}, {3.0, 5.0}, {2.0, 0.0}}), - r::value); + ASSERT_THROW(this->mtx4->transpose(Mtx::create(this->exec).get()), + gko::DimensionMismatch); } @@ -1938,16 +2048,14 @@ TYPED_TEST(Dense, SquareMatrixCanGatherRows) // {2.1, 3.4, 1.2} // clang-format on using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; auto exec = this->mtx5->get_executor(); gko::Array permute_idxs{exec, {1, 0}}; auto row_gathered = this->mtx5->row_gather(&permute_idxs); - // clang-format off GKO_ASSERT_MTX_NEAR(row_gathered, - l({{-2.0, 2.0, 4.5}, - {1.0, -1.0, -0.5}}), r::value); - // clang-format on + l({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}), 0.0); } @@ -1959,17 +2067,49 @@ TYPED_TEST(Dense, SquareMatrixCanGatherRowsIntoDense) // {2.1, 3.4, 1.2} // clang-format on using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; auto exec = this->mtx5->get_executor(); gko::Array permute_idxs{exec, {1, 0}}; auto row_gathered = Mtx::create(exec, gko::dim<2>{2, 3}); this->mtx5->row_gather(&permute_idxs, row_gathered.get()); - // clang-format off GKO_ASSERT_MTX_NEAR(row_gathered, - l({{-2.0, 2.0, 4.5}, - {1.0, -1.0, -0.5}}), r::value); + l({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}), 0.0); +} + + +TYPED_TEST(Dense, SquareSubmatrixCanGatherRowsIntoDense) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} // clang-format on + + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + auto row_gathered = Mtx::create(exec, gko::dim<2>{2, 2}, 4); + + this->mtx5->create_submatrix({0, 2}, {1, 3}) + ->row_gather(&permute_idxs, row_gathered.get()); + + GKO_ASSERT_MTX_NEAR(row_gathered, l({{2.0, 4.5}, {-1.0, -0.5}}), 0.0); + ASSERT_EQ(row_gathered->get_stride(), 4); +} + + +TYPED_TEST(Dense, SquareMatrixGatherRowsIntoDenseFailsForWrongDimensions) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + + ASSERT_THROW(this->mtx5->row_gather(&permute_idxs, Mtx::create(exec).get()), + gko::DimensionMismatch); } @@ -1981,16 +2121,14 @@ TYPED_TEST(Dense, SquareMatrixCanGatherRows64) // {2.1, 3.4, 1.2} // clang-format on using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; auto exec = this->mtx5->get_executor(); gko::Array permute_idxs{exec, {1, 0}}; auto row_gathered = this->mtx5->row_gather(&permute_idxs); - // clang-format off GKO_ASSERT_MTX_NEAR(row_gathered, - l({{-2.0, 2.0, 4.5}, - {1.0, -1.0, -0.5}}), r::value); - // clang-format on + l({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}), 0.0); } @@ -2002,17 +2140,49 @@ TYPED_TEST(Dense, SquareMatrixCanGatherRowsIntoDense64) // {2.1, 3.4, 1.2} // clang-format on using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; auto exec = this->mtx5->get_executor(); gko::Array permute_idxs{exec, {1, 0}}; auto row_gathered = Mtx::create(exec, gko::dim<2>{2, 3}); this->mtx5->row_gather(&permute_idxs, row_gathered.get()); - // clang-format off GKO_ASSERT_MTX_NEAR(row_gathered, - l({{-2.0, 2.0, 4.5}, - {1.0, -1.0, -0.5}}), r::value); + l({{-2.0, 2.0, 4.5}, {1.0, -1.0, -0.5}}), 0.0); +} + + +TYPED_TEST(Dense, SquareSubmatrixCanGatherRowsIntoDense64) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} // clang-format on + + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + auto row_gathered = Mtx::create(exec, gko::dim<2>{2, 2}, 4); + + this->mtx5->create_submatrix({0, 2}, {1, 3}) + ->row_gather(&permute_idxs, row_gathered.get()); + + GKO_ASSERT_MTX_NEAR(row_gathered, l({{2.0, 4.5}, {-1.0, -0.5}}), 0.0); + ASSERT_EQ(row_gathered->get_stride(), 4); +} + + +TYPED_TEST(Dense, SquareMatrixGatherRowsIntoDenseFailsForWrongDimensions64) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + + ASSERT_THROW(this->mtx5->row_gather(&permute_idxs, Mtx::create(exec).get()), + gko::DimensionMismatch); } @@ -2032,11 +2202,11 @@ TYPED_TEST(Dense, SquareMatrixIsPermutable) ->column_permute(&permute_idxs)); auto permuted = gko::as(this->mtx5->permute(&permute_idxs)); - GKO_ASSERT_MTX_NEAR(ref_permuted, ref_permuted, r::value); + GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0); } -TYPED_TEST(Dense, SquareMatrixIsInversePermutable) +TYPED_TEST(Dense, SquareMatrixIsPermutableIntoDense) { // clang-format off // {1.0, -1.0, -0.5}, @@ -2046,17 +2216,18 @@ TYPED_TEST(Dense, SquareMatrixIsInversePermutable) using Mtx = typename TestFixture::Mtx; auto exec = this->mtx5->get_executor(); gko::Array permute_idxs{exec, {1, 2, 0}}; + auto permuted = Mtx::create(exec, this->mtx5->get_size()); - auto ref_permuted = gko::as( - gko::as(this->mtx5->inverse_row_permute(&permute_idxs)) - ->inverse_column_permute(&permute_idxs)); - auto permuted = gko::as(this->mtx5->inverse_permute(&permute_idxs)); + auto ref_permuted = + gko::as(gko::as(this->mtx5->row_permute(&permute_idxs)) + ->column_permute(&permute_idxs)); + this->mtx5->permute(&permute_idxs, permuted.get()); - GKO_ASSERT_MTX_NEAR(ref_permuted, ref_permuted, r::value); + GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0); } -TYPED_TEST(Dense, SquareMatrixIsPermutable64) +TYPED_TEST(Dense, SquareSubmatrixIsPermutableIntoDense) { // clang-format off // {1.0, -1.0, -0.5}, @@ -2065,18 +2236,54 @@ TYPED_TEST(Dense, SquareMatrixIsPermutable64) // clang-format on using Mtx = typename TestFixture::Mtx; auto exec = this->mtx5->get_executor(); - gko::Array permute_idxs{exec, {1, 2, 0}}; + gko::Array permute_idxs{exec, {1, 0}}; + auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4); + auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3}); auto ref_permuted = - gko::as(gko::as(this->mtx5->row_permute(&permute_idxs)) + gko::as(gko::as(mtx->row_permute(&permute_idxs)) ->column_permute(&permute_idxs)); - auto permuted = gko::as(this->mtx5->permute(&permute_idxs)); + mtx->permute(&permute_idxs, permuted.get()); - GKO_ASSERT_MTX_NEAR(ref_permuted, ref_permuted, r::value); + GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0); + ASSERT_EQ(permuted->get_stride(), 4); } -TYPED_TEST(Dense, SquareMatrixIsInversePermutable64) +TYPED_TEST(Dense, NonSquareMatrixPermuteIntoDenseFails) +{ + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx4->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + ASSERT_THROW(this->mtx4->permute(&permute_idxs, this->mtx4->clone().get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixPermuteIntoDenseFailsForWrongPermutationSize) +{ + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2}}; + + ASSERT_THROW(this->mtx5->permute(&permute_idxs, this->mtx5->clone().get()), + gko::ValueMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixPermuteIntoDenseFailsForWrongDimensions) +{ + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + ASSERT_THROW(this->mtx5->permute(&permute_idxs, Mtx::create(exec).get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixIsInversePermutable) { // clang-format off // {1.0, -1.0, -0.5}, @@ -2085,18 +2292,18 @@ TYPED_TEST(Dense, SquareMatrixIsInversePermutable64) // clang-format on using Mtx = typename TestFixture::Mtx; auto exec = this->mtx5->get_executor(); - gko::Array permute_idxs{exec, {1, 2, 0}}; + gko::Array permute_idxs{exec, {1, 2, 0}}; auto ref_permuted = gko::as( gko::as(this->mtx5->inverse_row_permute(&permute_idxs)) ->inverse_column_permute(&permute_idxs)); auto permuted = gko::as(this->mtx5->inverse_permute(&permute_idxs)); - GKO_ASSERT_MTX_NEAR(ref_permuted, ref_permuted, r::value); + GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0); } -TYPED_TEST(Dense, SquareMatrixIsRowPermutable) +TYPED_TEST(Dense, SquareMatrixIsInversePermutableIntoDense) { // clang-format off // {1.0, -1.0, -0.5}, @@ -2106,84 +2313,78 @@ TYPED_TEST(Dense, SquareMatrixIsRowPermutable) using Mtx = typename TestFixture::Mtx; auto exec = this->mtx5->get_executor(); gko::Array permute_idxs{exec, {1, 2, 0}}; + auto permuted = Mtx::create(exec, this->mtx5->get_size()); - auto row_permute = this->mtx5->row_permute(&permute_idxs); - auto row_permute_dense = static_cast(row_permute.get()); + auto ref_permuted = gko::as( + gko::as(this->mtx5->inverse_row_permute(&permute_idxs)) + ->inverse_column_permute(&permute_idxs)); + this->mtx5->inverse_permute(&permute_idxs, permuted.get()); - // clang-format off - GKO_ASSERT_MTX_NEAR(row_permute_dense, - l({{-2.0, 2.0, 4.5}, - {2.1, 3.4, 1.2}, - {1.0, -1.0, -0.5}}), r::value); - // clang-format on + GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0); } -TYPED_TEST(Dense, NonSquareMatrixIsRowPermutable) +TYPED_TEST(Dense, SquareSubmatrixIsInversePermutableIntoDense) { // clang-format off - // {1.0, 3.0, 2.0}, - // {0.0, 5.0, 0.0} + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} // clang-format on using Mtx = typename TestFixture::Mtx; - auto exec = this->mtx4->get_executor(); + auto exec = this->mtx5->get_executor(); gko::Array permute_idxs{exec, {1, 0}}; + auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4); + auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3}); - auto row_permute = this->mtx4->row_permute(&permute_idxs); - auto row_permute_dense = static_cast(row_permute.get()); + auto ref_permuted = + gko::as(gko::as(mtx->inverse_row_permute(&permute_idxs)) + ->inverse_column_permute(&permute_idxs)); + mtx->inverse_permute(&permute_idxs, permuted.get()); - // clang-format off - GKO_ASSERT_MTX_NEAR(row_permute_dense, - l({{0.0, 5.0, 0.0}, - {1.0, 3.0, 2.0}}), r::value); - // clang-format on + GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0); + ASSERT_EQ(permuted->get_stride(), 4); } -TYPED_TEST(Dense, SquareMatrixIsColPermutable) +TYPED_TEST(Dense, NonSquareMatrixInversePermuteIntoDenseFails) { - // clang-format off - // {1.0, -1.0, -0.5}, - // {-2.0, 2.0, 4.5}, - // {2.1, 3.4, 1.2} - // clang-format on using Mtx = typename TestFixture::Mtx; - auto exec = this->mtx5->get_executor(); + auto exec = this->mtx4->get_executor(); gko::Array permute_idxs{exec, {1, 2, 0}}; - auto c_permute = this->mtx5->column_permute(&permute_idxs); - auto c_permute_dense = static_cast(c_permute.get()); + ASSERT_THROW( + this->mtx4->inverse_permute(&permute_idxs, this->mtx4->clone().get()), + gko::DimensionMismatch); +} + - // clang-format off - GKO_ASSERT_MTX_NEAR(c_permute_dense, - l({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}), r::value); - // clang-format on +TYPED_TEST(Dense, + SquareMatrixInversePermuteIntoDenseFailsForWrongPermutationSize) +{ + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {0, 1}}; + + ASSERT_THROW( + this->mtx5->inverse_permute(&permute_idxs, this->mtx5->clone().get()), + gko::ValueMismatch); } -TYPED_TEST(Dense, NonSquareMatrixIsColPermutable) +TYPED_TEST(Dense, SquareMatrixInversePermuteIntoDenseFailsForWrongDimensions) { - // clang-format off - // {1.0, 3.0, 2.0}, - // {0.0, 5.0, 0.0} - // clang-format on using Mtx = typename TestFixture::Mtx; - auto exec = this->mtx4->get_executor(); + auto exec = this->mtx5->get_executor(); gko::Array permute_idxs{exec, {1, 2, 0}}; - auto c_permute = this->mtx4->column_permute(&permute_idxs); - auto c_permute_dense = static_cast(c_permute.get()); - - // clang-format off - GKO_ASSERT_MTX_NEAR(c_permute_dense, - l({{3.0, 2.0, 1.0}, - {5.0, 0.0, 0.0}}), - r::value); - // clang-format on + ASSERT_THROW( + this->mtx5->inverse_permute(&permute_idxs, Mtx::create(exec).get()), + gko::DimensionMismatch); } -TYPED_TEST(Dense, SquareMatrixIsInverseRowPermutable) +TYPED_TEST(Dense, SquareMatrixIsPermutable64) { // clang-format off // {1.0, -1.0, -0.5}, @@ -2192,46 +2393,39 @@ TYPED_TEST(Dense, SquareMatrixIsInverseRowPermutable) // clang-format on using Mtx = typename TestFixture::Mtx; auto exec = this->mtx5->get_executor(); - gko::Array inverse_permute_idxs{exec, {1, 2, 0}}; + gko::Array permute_idxs{exec, {1, 2, 0}}; - auto inverse_row_permute = - this->mtx5->inverse_row_permute(&inverse_permute_idxs); - auto inverse_row_permute_dense = - static_cast(inverse_row_permute.get()); + auto ref_permuted = + gko::as(gko::as(this->mtx5->row_permute(&permute_idxs)) + ->column_permute(&permute_idxs)); + auto permuted = gko::as(this->mtx5->permute(&permute_idxs)); - // clang-format off - GKO_ASSERT_MTX_NEAR(inverse_row_permute_dense, - l({{2.1, 3.4, 1.2}, - {1.0, -1.0, -0.5}, - {-2.0, 2.0, 4.5}}), r::value); - // clang-format on + GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0); } -TYPED_TEST(Dense, NonSquareMatrixIsInverseRowPermutable) +TYPED_TEST(Dense, SquareMatrixIsPermutableIntoDense64) { // clang-format off - // {1.0, 3.0, 2.0}, - // {0.0, 5.0, 0.0} + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} // clang-format on using Mtx = typename TestFixture::Mtx; - auto exec = this->mtx4->get_executor(); - gko::Array inverse_permute_idxs{exec, {1, 0}}; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + auto permuted = Mtx::create(exec, this->mtx5->get_size()); - auto inverse_row_permute = - this->mtx4->inverse_row_permute(&inverse_permute_idxs); - auto inverse_row_permute_dense = - static_cast(inverse_row_permute.get()); + auto ref_permuted = + gko::as(gko::as(this->mtx5->row_permute(&permute_idxs)) + ->column_permute(&permute_idxs)); + this->mtx5->permute(&permute_idxs, permuted.get()); - // clang-format off - GKO_ASSERT_MTX_NEAR(inverse_row_permute_dense, - l({{0.0, 5.0, 0.0}, - {1.0, 3.0, 2.0}}), r::value); - // clang-format on + GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0); } -TYPED_TEST(Dense, SquareMatrixIsInverseColPermutable) +TYPED_TEST(Dense, SquareSubmatrixIsPermutableIntoDense64) { // clang-format off // {1.0, -1.0, -0.5}, @@ -2240,107 +2434,927 @@ TYPED_TEST(Dense, SquareMatrixIsInverseColPermutable) // clang-format on using Mtx = typename TestFixture::Mtx; auto exec = this->mtx5->get_executor(); - gko::Array inverse_permute_idxs{exec, {1, 2, 0}}; + gko::Array permute_idxs{exec, {1, 0}}; + auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4); + auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3}); - auto inverse_c_permute = - this->mtx5->inverse_column_permute(&inverse_permute_idxs); - auto inverse_c_permute_dense = static_cast(inverse_c_permute.get()); + auto ref_permuted = + gko::as(gko::as(mtx->row_permute(&permute_idxs)) + ->column_permute(&permute_idxs)); + mtx->permute(&permute_idxs, permuted.get()); - // clang-format off - GKO_ASSERT_MTX_NEAR(inverse_c_permute_dense, - l({{-0.5, 1.0, -1.0}, - {4.5, -2.0, 2.0}, - {1.2, 2.1, 3.4}}), r::value); - // clang-format on + GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0); + ASSERT_EQ(permuted->get_stride(), 4); } -TYPED_TEST(Dense, NonSquareMatrixIsInverseColPermutable) +TYPED_TEST(Dense, NonSquareMatrixPermuteIntoDenseFails64) { - // clang-format off - // {1.0, 3.0, 2.0}, - // {0.0, 5.0, 0.0} - // clang-format on using Mtx = typename TestFixture::Mtx; auto exec = this->mtx4->get_executor(); - gko::Array inverse_permute_idxs{exec, {1, 2, 0}}; + gko::Array permute_idxs{exec, {1, 2, 0}}; - auto inverse_c_permute = - this->mtx4->inverse_column_permute(&inverse_permute_idxs); - auto inverse_c_permute_dense = static_cast(inverse_c_permute.get()); + ASSERT_THROW(this->mtx4->permute(&permute_idxs, this->mtx4->clone().get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixPermuteIntoDenseFailsForWrongPermutationSize64) +{ + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2}}; + + ASSERT_THROW(this->mtx5->permute(&permute_idxs, this->mtx5->clone().get()), + gko::ValueMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixPermuteIntoDenseFailsForWrongDimensions64) +{ + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + ASSERT_THROW(this->mtx5->permute(&permute_idxs, Mtx::create(exec).get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixIsInversePermutable64) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + auto ref_permuted = gko::as( + gko::as(this->mtx5->inverse_row_permute(&permute_idxs)) + ->inverse_column_permute(&permute_idxs)); + auto permuted = gko::as(this->mtx5->inverse_permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0); +} + + +TYPED_TEST(Dense, SquareMatrixIsInversePermutableIntoDense64) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + auto permuted = Mtx::create(exec, this->mtx5->get_size()); + + auto ref_permuted = gko::as( + gko::as(this->mtx5->inverse_row_permute(&permute_idxs)) + ->inverse_column_permute(&permute_idxs)); + this->mtx5->inverse_permute(&permute_idxs, permuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0); +} + + +TYPED_TEST(Dense, SquareSubmatrixIsInversePermutableIntoDense64) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + auto permuted = Mtx::create(exec, gko::dim<2>{2, 2}, 4); + auto mtx = this->mtx5->create_submatrix({0, 2}, {1, 3}); + + auto ref_permuted = + gko::as(gko::as(mtx->inverse_row_permute(&permute_idxs)) + ->inverse_column_permute(&permute_idxs)); + mtx->inverse_permute(&permute_idxs, permuted.get()); + + GKO_ASSERT_MTX_NEAR(permuted, ref_permuted, 0.0); + ASSERT_EQ(permuted->get_stride(), 4); +} + + +TYPED_TEST(Dense, NonSquareMatrixInversePermuteIntoDenseFails64) +{ + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx4->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + ASSERT_THROW( + this->mtx4->inverse_permute(&permute_idxs, this->mtx4->clone().get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Dense, + SquareMatrixInversePermuteIntoDenseFailsForWrongPermutationSize64) +{ + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2}}; + + ASSERT_THROW( + this->mtx5->inverse_permute(&permute_idxs, this->mtx5->clone().get()), + gko::ValueMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixInversePermuteIntoDenseFailsForWrongDimensions64) +{ + using Mtx = typename TestFixture::Mtx; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + ASSERT_THROW( + this->mtx5->inverse_permute(&permute_idxs, Mtx::create(exec).get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixIsRowPermutable) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + auto row_permute = gko::as(this->mtx5->row_permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR( + row_permute, + l({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}), 0.0); +} + + +TYPED_TEST(Dense, NonSquareMatrixIsRowPermutable) +{ + // clang-format off + // {1.0, 3.0, 2.0}, + // {0.0, 5.0, 0.0} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx4->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + + auto row_permute = gko::as(this->mtx4->row_permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR(row_permute, l({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), + 0.0); +} + + +TYPED_TEST(Dense, SquareMatrixIsRowPermutableIntoDense) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + auto row_permute = Mtx::create(exec, this->mtx5->get_size()); + + this->mtx5->row_permute(&permute_idxs, row_permute.get()); + + GKO_ASSERT_MTX_NEAR( + row_permute, + l({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}), 0.0); +} + + +TYPED_TEST(Dense, SquareSubmatrixIsRowPermutableIntoDense) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4); + + this->mtx5->create_submatrix({0, 2}, {0, 2}) + ->row_permute(&permute_idxs, row_permute.get()); + + GKO_ASSERT_MTX_NEAR(row_permute, l({{-2.0, 2.0}, {1.0, -1.0}}), 0.0); + ASSERT_EQ(row_permute->get_stride(), 4); +} + + +TYPED_TEST(Dense, SquareMatrixRowPermuteIntoDenseFailsForWrongPermutationSize) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2}}; + auto row_permute = Mtx::create(exec, this->mtx5->get_size()); + + ASSERT_THROW(this->mtx5->row_permute(&permute_idxs, row_permute.get()), + gko::ValueMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixRowPermuteIntoDenseFailsForWrongDimensions) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + ASSERT_THROW( + this->mtx5->row_permute(&permute_idxs, Mtx::create(exec).get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixIsColPermutable) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + auto c_permute = gko::as(this->mtx5->column_permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR( + c_permute, l({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}), + 0.0); +} + + +TYPED_TEST(Dense, NonSquareMatrixIsColPermutable) +{ + // clang-format off + // {1.0, 3.0, 2.0}, + // {0.0, 5.0, 0.0} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx4->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + auto c_permute = gko::as(this->mtx4->column_permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR(c_permute, l({{3.0, 2.0, 1.0}, {5.0, 0.0, 0.0}}), + 0.0); +} + + +TYPED_TEST(Dense, SquareMatrixIsColPermutableIntoDense) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + auto c_permute = Mtx::create(exec, this->mtx5->get_size()); + + this->mtx5->column_permute(&permute_idxs, c_permute.get()); + + GKO_ASSERT_MTX_NEAR( + c_permute, l({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}), + 0.0); +} + + +TYPED_TEST(Dense, SquareSubmatrixIsColPermutableIntoDense) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4); + + this->mtx5->create_submatrix({0, 2}, {0, 2}) + ->column_permute(&permute_idxs, c_permute.get()); + + GKO_ASSERT_MTX_NEAR(c_permute, l({{-1.0, 1.0}, {2.0, -2.0}}), 0.0); + ASSERT_EQ(c_permute->get_stride(), 4); +} + + +TYPED_TEST(Dense, SquareMatrixColPermuteIntoDenseFailsForWrongPermutationSize) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2}}; + auto row_permute = Mtx::create(exec, this->mtx5->get_size()); + + ASSERT_THROW(this->mtx5->column_permute(&permute_idxs, row_permute.get()), + gko::ValueMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixColPermuteIntoDenseFailsForWrongDimensions) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + ASSERT_THROW( + this->mtx5->column_permute(&permute_idxs, Mtx::create(exec).get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixIsInverseRowPermutable) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array inverse_permute_idxs{exec, {1, 2, 0}}; + + auto inverse_row_permute = + gko::as(this->mtx5->inverse_row_permute(&inverse_permute_idxs)); + + GKO_ASSERT_MTX_NEAR( + inverse_row_permute, + l({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}), 0.0); +} + + +TYPED_TEST(Dense, NonSquareMatrixIsInverseRowPermutable) +{ + // clang-format off + // {1.0, 3.0, 2.0}, + // {0.0, 5.0, 0.0} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx4->get_executor(); + gko::Array inverse_permute_idxs{exec, {1, 0}}; + + auto inverse_row_permute = + gko::as(this->mtx4->inverse_row_permute(&inverse_permute_idxs)); + + GKO_ASSERT_MTX_NEAR(inverse_row_permute, + l({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), 0.0); +} + + +TYPED_TEST(Dense, SquareMatrixIsInverseRowPermutableIntoDense) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + auto row_permute = Mtx::create(exec, this->mtx5->get_size()); + + this->mtx5->inverse_row_permute(&permute_idxs, row_permute.get()); + + GKO_ASSERT_MTX_NEAR( + row_permute, + l({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}), 0.0); +} + + +TYPED_TEST(Dense, SquareSubmatrixIsInverseRowPermutableIntoDense) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4); + + this->mtx5->create_submatrix({0, 2}, {0, 2}) + ->inverse_row_permute(&permute_idxs, row_permute.get()); + + GKO_ASSERT_MTX_NEAR(row_permute, l({{-2.0, 2.0}, {1.0, -1.0}}), 0.0); + ASSERT_EQ(row_permute->get_stride(), 4); +} + + +TYPED_TEST(Dense, + SquareMatrixInverseRowPermuteIntoDenseFailsForWrongPermutationSize) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2}}; + auto row_permute = Mtx::create(exec, this->mtx5->get_size()); + + ASSERT_THROW( + this->mtx5->inverse_row_permute(&permute_idxs, row_permute.get()), + gko::ValueMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixInverseRowPermuteIntoDenseFailsForWrongDimensions) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + ASSERT_THROW( + this->mtx5->inverse_row_permute(&permute_idxs, Mtx::create(exec).get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixIsInverseColPermutable) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array inverse_permute_idxs{exec, {1, 2, 0}}; + + auto inverse_c_permute = + gko::as(this->mtx5->inverse_column_permute(&inverse_permute_idxs)); + + GKO_ASSERT_MTX_NEAR( + inverse_c_permute, + l({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}), 0.0); +} + + +TYPED_TEST(Dense, NonSquareMatrixIsInverseColPermutable) +{ + // clang-format off + // {1.0, 3.0, 2.0}, + // {0.0, 5.0, 0.0} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx4->get_executor(); + gko::Array inverse_permute_idxs{exec, {1, 2, 0}}; + + auto inverse_c_permute = + gko::as(this->mtx4->inverse_column_permute(&inverse_permute_idxs)); + + GKO_ASSERT_MTX_NEAR(inverse_c_permute, + l({{2.0, 1.0, 3.0}, {0.0, 0.0, 5.0}}), 0.0); +} + + +TYPED_TEST(Dense, SquareMatrixIsInverseColPermutableIntoDense) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + auto c_permute = Mtx::create(exec, this->mtx5->get_size()); + + this->mtx5->inverse_column_permute(&permute_idxs, c_permute.get()); + + GKO_ASSERT_MTX_NEAR( + c_permute, l({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}), + 0.0); +} + + +TYPED_TEST(Dense, SquareSubmatrixIsInverseColPermutableIntoDense) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4); + + this->mtx5->create_submatrix({0, 2}, {0, 2}) + ->column_permute(&permute_idxs, c_permute.get()); + + GKO_ASSERT_MTX_NEAR(c_permute, l({{-1.0, 1.0}, {2.0, -2.0}}), 0.0); + ASSERT_EQ(c_permute->get_stride(), 4); +} + + +TYPED_TEST(Dense, + SquareMatrixInverseColPermuteIntoDenseFailsForWrongPermutationSize) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2}}; + auto row_permute = Mtx::create(exec, this->mtx5->get_size()); + + ASSERT_THROW( + this->mtx5->inverse_column_permute(&permute_idxs, row_permute.get()), + gko::ValueMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixInverseColPermuteIntoDenseFailsForWrongDimensions) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + ASSERT_THROW(this->mtx5->inverse_column_permute(&permute_idxs, + Mtx::create(exec).get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixIsRowPermutable64) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + auto row_permute = gko::as(this->mtx5->row_permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR( + row_permute, + l({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}), 0.0); +} + + +TYPED_TEST(Dense, NonSquareMatrixIsRowPermutable64) +{ + // clang-format off + // {1.0, 3.0, 2.0}, + // {0.0, 5.0, 0.0} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx4->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + + auto row_permute = gko::as(this->mtx4->row_permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR(row_permute, l({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), + 0.0); +} + + +TYPED_TEST(Dense, SquareMatrixIsRowPermutableIntoDense64) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + auto row_permute = Mtx::create(exec, this->mtx5->get_size()); + + this->mtx5->row_permute(&permute_idxs, row_permute.get()); + + GKO_ASSERT_MTX_NEAR( + row_permute, + l({{-2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}}), 0.0); +} + + +TYPED_TEST(Dense, SquareSubmatrixIsRowPermutableIntoDense64) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4); + + this->mtx5->create_submatrix({0, 2}, {0, 2}) + ->row_permute(&permute_idxs, row_permute.get()); + + GKO_ASSERT_MTX_NEAR(row_permute, l({{-2.0, 2.0}, {1.0, -1.0}}), 0.0); + ASSERT_EQ(row_permute->get_stride(), 4); +} + + +TYPED_TEST(Dense, SquareMatrixRowPermuteIntoDenseFailsForWrongPermutationSize64) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2}}; + auto row_permute = Mtx::create(exec, this->mtx5->get_size()); + + ASSERT_THROW(this->mtx5->row_permute(&permute_idxs, row_permute.get()), + gko::ValueMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixRowPermuteIntoDenseFailsForWrongDimensions64) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + ASSERT_THROW( + this->mtx5->row_permute(&permute_idxs, Mtx::create(exec).get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixIsColPermutable64) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + auto c_permute = gko::as(this->mtx5->column_permute(&permute_idxs)); + + GKO_ASSERT_MTX_NEAR( + c_permute, l({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}), + 0.0); +} + + +TYPED_TEST(Dense, NonSquareMatrixIsColPermutable64) +{ + // clang-format off + // {1.0, 3.0, 2.0}, + // {0.0, 5.0, 0.0} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx4->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + auto c_permute = gko::as(this->mtx4->column_permute(&permute_idxs)); + GKO_ASSERT_MTX_NEAR(c_permute, l({{3.0, 2.0, 1.0}, {5.0, 0.0, 0.0}}), + 0.0); +} + + +TYPED_TEST(Dense, SquareMatrixIsColPermutableIntoDense64) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + auto c_permute = Mtx::create(exec, this->mtx5->get_size()); + + this->mtx5->column_permute(&permute_idxs, c_permute.get()); + + GKO_ASSERT_MTX_NEAR( + c_permute, l({{-1.0, -0.5, 1.0}, {2.0, 4.5, -2.0}, {3.4, 1.2, 2.1}}), + 0.0); +} + + +TYPED_TEST(Dense, SquareSubmatrixIsColPermutableIntoDense64) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4); + + this->mtx5->create_submatrix({0, 2}, {0, 2}) + ->column_permute(&permute_idxs, c_permute.get()); + + GKO_ASSERT_MTX_NEAR(c_permute, l({{-1.0, 1.0}, {2.0, -2.0}}), 0.0); + ASSERT_EQ(c_permute->get_stride(), 4); +} + + +TYPED_TEST(Dense, SquareMatrixColPermuteIntoDenseFailsForWrongPermutationSize64) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2}}; + auto row_permute = Mtx::create(exec, this->mtx5->get_size()); + + ASSERT_THROW(this->mtx5->column_permute(&permute_idxs, row_permute.get()), + gko::ValueMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixColPermuteIntoDenseFailsForWrongDimensions64) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + ASSERT_THROW( + this->mtx5->column_permute(&permute_idxs, Mtx::create(exec).get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Dense, SquareMatrixIsInverseRowPermutable64) +{ // clang-format off - GKO_ASSERT_MTX_NEAR(inverse_c_permute_dense, - l({{2.0, 1.0, 3.0}, - {0.0, 0.0, 5.0}}), - r::value); + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array inverse_permute_idxs{exec, {1, 2, 0}}; + + auto inverse_row_permute = + gko::as(this->mtx5->inverse_row_permute(&inverse_permute_idxs)); + + GKO_ASSERT_MTX_NEAR( + inverse_row_permute, + l({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}), 0.0); } -TYPED_TEST(Dense, NonSquareMatrixIsRowPermutable64) +TYPED_TEST(Dense, NonSquareMatrixIsInverseRowPermutable64) { // clang-format off // {1.0, 3.0, 2.0}, // {0.0, 5.0, 0.0} // clang-format on using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; auto exec = this->mtx4->get_executor(); - gko::Array permute_idxs{exec, {1, 0}}; + gko::Array inverse_permute_idxs{exec, {1, 0}}; - auto row_permute = this->mtx4->row_permute(&permute_idxs); - auto row_permute_dense = static_cast(row_permute.get()); + auto inverse_row_permute = + gko::as(this->mtx4->inverse_row_permute(&inverse_permute_idxs)); - // clang-format off - GKO_ASSERT_MTX_NEAR(row_permute_dense, - l({{0.0, 5.0, 0.0}, - {1.0, 3.0, 2.0}}), r::value); - // clang-format on + GKO_ASSERT_MTX_NEAR(inverse_row_permute, + l({{0.0, 5.0, 0.0}, {1.0, 3.0, 2.0}}), 0.0); } -TYPED_TEST(Dense, NonSquareMatrixIsColPermutable64) +TYPED_TEST(Dense, SquareMatrixIsInverseRowPermutableIntoDense64) { // clang-format off - // {1.0, 3.0, 2.0}, - // {0.0, 5.0, 0.0} + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} // clang-format on using Mtx = typename TestFixture::Mtx; - auto exec = this->mtx4->get_executor(); + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); gko::Array permute_idxs{exec, {1, 2, 0}}; + auto row_permute = Mtx::create(exec, this->mtx5->get_size()); - auto c_permute = this->mtx4->column_permute(&permute_idxs); - auto c_permute_dense = static_cast(c_permute.get()); + this->mtx5->inverse_row_permute(&permute_idxs, row_permute.get()); - // clang-format off - GKO_ASSERT_MTX_NEAR(c_permute_dense, - l({{3.0, 2.0, 1.0}, - {5.0, 0.0, 0.0}}), - r::value); - // clang-format on + GKO_ASSERT_MTX_NEAR( + row_permute, + l({{2.1, 3.4, 1.2}, {1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}}), 0.0); } -TYPED_TEST(Dense, NonSquareMatrixIsInverseRowPermutable64) +TYPED_TEST(Dense, SquareSubmatrixIsInverseRowPermutableIntoDense64) { // clang-format off - // {1.0, 3.0, 2.0}, - // {0.0, 5.0, 0.0} + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} // clang-format on using Mtx = typename TestFixture::Mtx; - auto exec = this->mtx4->get_executor(); - gko::Array inverse_permute_idxs{exec, {1, 0}}; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + auto row_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4); - auto inverse_row_permute = - this->mtx4->inverse_row_permute(&inverse_permute_idxs); - auto inverse_row_permute_dense = - static_cast(inverse_row_permute.get()); + this->mtx5->create_submatrix({0, 2}, {0, 2}) + ->inverse_row_permute(&permute_idxs, row_permute.get()); + + GKO_ASSERT_MTX_NEAR(row_permute, l({{-2.0, 2.0}, {1.0, -1.0}}), 0.0); + ASSERT_EQ(row_permute->get_stride(), 4); +} + + +TYPED_TEST(Dense, + SquareMatrixInverseRowPermuteIntoDenseFailsForWrongPermutationSize64) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2}}; + auto row_permute = Mtx::create(exec, this->mtx5->get_size()); + + ASSERT_THROW( + this->mtx5->inverse_row_permute(&permute_idxs, row_permute.get()), + gko::ValueMismatch); +} + + +TYPED_TEST(Dense, + SquareMatrixInverseRowPermuteIntoDenseFailsForWrongDimensions64) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + ASSERT_THROW( + this->mtx5->inverse_row_permute(&permute_idxs, Mtx::create(exec).get()), + gko::DimensionMismatch); +} + +TYPED_TEST(Dense, SquareMatrixIsInverseColPermutable64) +{ // clang-format off - GKO_ASSERT_MTX_NEAR(inverse_row_permute_dense, - l({{0.0, 5.0, 0.0}, - {1.0, 3.0, 2.0}}), r::value); + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array inverse_permute_idxs{exec, {1, 2, 0}}; + + auto inverse_c_permute = + gko::as(this->mtx5->inverse_column_permute(&inverse_permute_idxs)); + + GKO_ASSERT_MTX_NEAR( + inverse_c_permute, + l({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}), 0.0); } @@ -2351,19 +3365,86 @@ TYPED_TEST(Dense, NonSquareMatrixIsInverseColPermutable64) // {0.0, 5.0, 0.0} // clang-format on using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; auto exec = this->mtx4->get_executor(); gko::Array inverse_permute_idxs{exec, {1, 2, 0}}; auto inverse_c_permute = - this->mtx4->inverse_column_permute(&inverse_permute_idxs); - auto inverse_c_permute_dense = static_cast(inverse_c_permute.get()); + gko::as(this->mtx4->inverse_column_permute(&inverse_permute_idxs)); + + GKO_ASSERT_MTX_NEAR(inverse_c_permute, + l({{2.0, 1.0, 3.0}, {0.0, 0.0, 5.0}}), 0.0); +} + + +TYPED_TEST(Dense, SquareMatrixIsInverseColPermutableIntoDense64) +{ + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + auto c_permute = Mtx::create(exec, this->mtx5->get_size()); + this->mtx5->inverse_column_permute(&permute_idxs, c_permute.get()); + + GKO_ASSERT_MTX_NEAR( + c_permute, l({{-0.5, 1.0, -1.0}, {4.5, -2.0, 2.0}, {1.2, 2.1, 3.4}}), + 0.0); +} + + +TYPED_TEST(Dense, SquareSubmatrixIsInverseColPermutableIntoDense64) +{ // clang-format off - GKO_ASSERT_MTX_NEAR(inverse_c_permute_dense, - l({{2.0, 1.0, 3.0}, - {0.0, 0.0, 5.0}}), - r::value); + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} // clang-format on + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 0}}; + auto c_permute = Mtx::create(exec, gko::dim<2>{2, 2}, 4); + + this->mtx5->create_submatrix({0, 2}, {0, 2}) + ->column_permute(&permute_idxs, c_permute.get()); + + GKO_ASSERT_MTX_NEAR(c_permute, l({{-1.0, 1.0}, {2.0, -2.0}}), 0.0); + ASSERT_EQ(c_permute->get_stride(), 4); +} + + +TYPED_TEST(Dense, + SquareMatrixInverseColPermuteIntoDenseFailsForWrongPermutationSize64) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2}}; + auto row_permute = Mtx::create(exec, this->mtx5->get_size()); + + ASSERT_THROW( + this->mtx5->inverse_column_permute(&permute_idxs, row_permute.get()), + gko::ValueMismatch); +} + + +TYPED_TEST(Dense, + SquareMatrixInverseColPermuteIntoDenseFailsForWrongDimensions64) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = this->mtx5->get_executor(); + gko::Array permute_idxs{exec, {1, 2, 0}}; + + ASSERT_THROW(this->mtx5->inverse_column_permute(&permute_idxs, + Mtx::create(exec).get()), + gko::DimensionMismatch); } @@ -2421,6 +3502,66 @@ TYPED_TEST(Dense, ExtractsDiagonalFromShortFatMatrix) } +TYPED_TEST(Dense, ExtractsDiagonalFromSquareMatrixIntoDiagonal) +{ + using T = typename TestFixture::value_type; + + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + auto diag = gko::matrix::Diagonal::create(this->exec, 3); + + this->mtx5->extract_diagonal(diag.get()); + + ASSERT_EQ(diag->get_size()[0], 3); + ASSERT_EQ(diag->get_size()[1], 3); + ASSERT_EQ(diag->get_values()[0], T{1.}); + ASSERT_EQ(diag->get_values()[1], T{2.}); + ASSERT_EQ(diag->get_values()[2], T{1.2}); +} + + +TYPED_TEST(Dense, ExtractsDiagonalFromTallSkinnyMatrixIntoDiagonal) +{ + using T = typename TestFixture::value_type; + + // clang-format off + // {1.0, 3.0, 2.0}, + // {0.0, 5.0, 0.0} + // clang-format on + auto diag = gko::matrix::Diagonal::create(this->exec, 2); + + this->mtx4->extract_diagonal(diag.get()); + + ASSERT_EQ(diag->get_size()[0], 2); + ASSERT_EQ(diag->get_size()[1], 2); + ASSERT_EQ(diag->get_values()[0], T{1.}); + ASSERT_EQ(diag->get_values()[1], T{5.}); +} + + +TYPED_TEST(Dense, ExtractsDiagonalFromShortFatMatrixIntoDiagonal) +{ + using T = typename TestFixture::value_type; + + // clang-format off + // { 1.0, -1.0}, + // {-2.0, 2.0}, + // {-3.0, 3.0} + // clang-format on + auto diag = gko::matrix::Diagonal::create(this->exec, 2); + + this->mtx8->extract_diagonal(diag.get()); + + ASSERT_EQ(diag->get_size()[0], 2); + ASSERT_EQ(diag->get_size()[1], 2); + ASSERT_EQ(diag->get_values()[0], T{1.}); + ASSERT_EQ(diag->get_values()[1], T{2.}); +} + + TYPED_TEST(Dense, InplaceAbsolute) { using T = typename TestFixture::value_type; @@ -2432,9 +3573,9 @@ TYPED_TEST(Dense, InplaceAbsolute) this->mtx5->compute_absolute_inplace(); - GKO_ASSERT_MTX_NEAR(this->mtx5, - l({{1.0, 1.0, 0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}), - r::value); + GKO_ASSERT_MTX_NEAR( + this->mtx5, l({{1.0, 1.0, 0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}), + 0.0); } @@ -2451,9 +3592,9 @@ TYPED_TEST(Dense, InplaceAbsoluteSubMatrix) mtx->compute_absolute_inplace(); - GKO_ASSERT_MTX_NEAR(this->mtx5, - l({{1.0, 1.0, -0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}), - r::value); + GKO_ASSERT_MTX_NEAR( + this->mtx5, l({{1.0, 1.0, -0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}), + 0.0); } @@ -2468,9 +3609,29 @@ TYPED_TEST(Dense, OutplaceAbsolute) auto abs_mtx = this->mtx5->compute_absolute(); - GKO_ASSERT_MTX_NEAR(abs_mtx, - l({{1.0, 1.0, 0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}), - r::value); + GKO_ASSERT_MTX_NEAR( + abs_mtx, l({{1.0, 1.0, 0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}), + 0.0); +} + + +TYPED_TEST(Dense, OutplaceAbsoluteIntoDense) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + auto abs_mtx = + gko::remove_complex::create(this->exec, this->mtx5->get_size()); + + this->mtx5->compute_absolute(abs_mtx.get()); + + GKO_ASSERT_MTX_NEAR( + abs_mtx, l({{1.0, 1.0, 0.5}, {2.0, 2.0, 4.5}, {2.1, 3.4, 1.2}}), + 0.0); } @@ -2487,12 +3648,31 @@ TYPED_TEST(Dense, OutplaceAbsoluteSubMatrix) auto abs_mtx = mtx->compute_absolute(); - GKO_ASSERT_MTX_NEAR(abs_mtx, l({{1.0, 1.0}, {2.0, 2.0}}), - r::value); + GKO_ASSERT_MTX_NEAR(abs_mtx, l({{1.0, 1.0}, {2.0, 2.0}}), 0); GKO_ASSERT_EQ(abs_mtx->get_stride(), 2); } +TYPED_TEST(Dense, OutplaceSubmatrixAbsoluteIntoDense) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + // clang-format off + // {1.0, -1.0, -0.5}, + // {-2.0, 2.0, 4.5}, + // {2.1, 3.4, 1.2} + // clang-format on + auto mtx = this->mtx5->create_submatrix(gko::span{0, 2}, gko::span{0, 2}); + auto abs_mtx = + gko::remove_complex::create(this->exec, gko::dim<2>{2, 2}, 4); + + mtx->compute_absolute(abs_mtx.get()); + + GKO_ASSERT_MTX_NEAR(abs_mtx, l({{1.0, 1.0}, {2.0, 2.0}}), 0); + GKO_ASSERT_EQ(abs_mtx->get_stride(), 4); +} + + TYPED_TEST(Dense, AppliesToComplex) { using value_type = typename TestFixture::value_type; @@ -2500,13 +3680,12 @@ TYPED_TEST(Dense, AppliesToComplex) using Vec = gko::matrix::Dense; auto exec = gko::ReferenceExecutor::create(); - // clang-format off - auto b = gko::initialize( - {{complex_type{1.0, 0.0}, complex_type{2.0, 1.0}}, - {complex_type{2.0, 2.0}, complex_type{3.0, 3.0}}, - {complex_type{3.0, 4.0}, complex_type{4.0, 5.0}}}, exec); - auto x = Vec::create(exec, gko::dim<2>{2,2}); - // clang-format on + auto b = + gko::initialize({{complex_type{1.0, 0.0}, complex_type{2.0, 1.0}}, + {complex_type{2.0, 2.0}, complex_type{3.0, 3.0}}, + {complex_type{3.0, 4.0}, complex_type{4.0, 5.0}}}, + exec); + auto x = Vec::create(exec, gko::dim<2>{2, 2}); this->mtx1->apply(b.get(), x.get()); @@ -2526,13 +3705,12 @@ TYPED_TEST(Dense, AppliesToMixedComplex) using Vec = gko::matrix::Dense; auto exec = gko::ReferenceExecutor::create(); - // clang-format off auto b = gko::initialize( {{mixed_complex_type{1.0, 0.0}, mixed_complex_type{2.0, 1.0}}, {mixed_complex_type{2.0, 2.0}, mixed_complex_type{3.0, 3.0}}, - {mixed_complex_type{3.0, 4.0}, mixed_complex_type{4.0, 5.0}}}, exec); - auto x = Vec::create(exec, gko::dim<2>{2,2}); - // clang-format on + {mixed_complex_type{3.0, 4.0}, mixed_complex_type{4.0, 5.0}}}, + exec); + auto x = Vec::create(exec, gko::dim<2>{2, 2}); this->mtx1->apply(b.get(), x.get()); @@ -2552,17 +3730,17 @@ TYPED_TEST(Dense, AdvancedAppliesToComplex) using DenseComplex = gko::matrix::Dense; auto exec = gko::ReferenceExecutor::create(); - // clang-format off auto b = gko::initialize( {{complex_type{1.0, 0.0}, complex_type{2.0, 1.0}}, {complex_type{2.0, 2.0}, complex_type{3.0, 3.0}}, - {complex_type{3.0, 4.0}, complex_type{4.0, 5.0}}}, exec); + {complex_type{3.0, 4.0}, complex_type{4.0, 5.0}}}, + exec); auto x = gko::initialize( {{complex_type{1.0, 0.0}, complex_type{2.0, 1.0}}, - {complex_type{2.0, 2.0}, complex_type{3.0, 3.0}}}, exec); + {complex_type{2.0, 2.0}, complex_type{3.0, 3.0}}}, + exec); auto alpha = gko::initialize({-1.0}, this->exec); auto beta = gko::initialize({2.0}, this->exec); - // clang-format on this->mtx1->apply(alpha.get(), b.get(), beta.get(), x.get()); @@ -2583,17 +3761,17 @@ TYPED_TEST(Dense, AdvancedAppliesToMixedComplex) using MixedDenseComplex = gko::matrix::Dense; auto exec = gko::ReferenceExecutor::create(); - // clang-format off auto b = gko::initialize( {{mixed_complex_type{1.0, 0.0}, mixed_complex_type{2.0, 1.0}}, {mixed_complex_type{2.0, 2.0}, mixed_complex_type{3.0, 3.0}}, - {mixed_complex_type{3.0, 4.0}, mixed_complex_type{4.0, 5.0}}}, exec); + {mixed_complex_type{3.0, 4.0}, mixed_complex_type{4.0, 5.0}}}, + exec); auto x = gko::initialize( {{mixed_complex_type{1.0, 0.0}, mixed_complex_type{2.0, 1.0}}, - {mixed_complex_type{2.0, 2.0}, mixed_complex_type{3.0, 3.0}}}, exec); + {mixed_complex_type{2.0, 2.0}, mixed_complex_type{3.0, 3.0}}}, + exec); auto alpha = gko::initialize({-1.0}, this->exec); auto beta = gko::initialize({2.0}, this->exec); - // clang-format on this->mtx1->apply(alpha.get(), b.get(), beta.get(), x.get()); @@ -2617,11 +3795,11 @@ TYPED_TEST(Dense, MakeComplex) auto complex_mtx = this->mtx5->make_complex(); - GKO_ASSERT_MTX_NEAR(complex_mtx, this->mtx5, 0); + GKO_ASSERT_MTX_NEAR(complex_mtx, this->mtx5, 0.0); } -TYPED_TEST(Dense, MakeComplexWithGivenResult) +TYPED_TEST(Dense, MakeComplexIntoDense) { using T = typename TestFixture::value_type; using ComplexMtx = typename TestFixture::ComplexMtx; @@ -2635,11 +3813,11 @@ TYPED_TEST(Dense, MakeComplexWithGivenResult) auto complex_mtx = ComplexMtx::create(exec, this->mtx5->get_size()); this->mtx5->make_complex(complex_mtx.get()); - GKO_ASSERT_MTX_NEAR(complex_mtx, this->mtx5, 0); + GKO_ASSERT_MTX_NEAR(complex_mtx, this->mtx5, 0.0); } -TYPED_TEST(Dense, MakeComplexWithGivenResultFailsForWrongDimensions) +TYPED_TEST(Dense, MakeComplexIntoDenseFailsForWrongDimensions) { using T = typename TestFixture::value_type; using ComplexMtx = typename TestFixture::ComplexMtx; @@ -2668,11 +3846,11 @@ TYPED_TEST(Dense, GetReal) auto real_mtx = this->mtx5->get_real(); - GKO_ASSERT_MTX_NEAR(real_mtx, this->mtx5, 0); + GKO_ASSERT_MTX_NEAR(real_mtx, this->mtx5, 0.0); } -TYPED_TEST(Dense, GetRealWithGivenResult) +TYPED_TEST(Dense, GetRealIntoDense) { using T = typename TestFixture::value_type; using RealMtx = typename TestFixture::RealMtx; @@ -2686,11 +3864,11 @@ TYPED_TEST(Dense, GetRealWithGivenResult) auto real_mtx = RealMtx::create(exec, this->mtx5->get_size()); this->mtx5->get_real(real_mtx.get()); - GKO_ASSERT_MTX_NEAR(real_mtx, this->mtx5, 0); + GKO_ASSERT_MTX_NEAR(real_mtx, this->mtx5, 0.0); } -TYPED_TEST(Dense, GetRealWithGivenResultFailsForWrongDimensions) +TYPED_TEST(Dense, GetRealIntoDenseFailsForWrongDimensions) { using T = typename TestFixture::value_type; using RealMtx = typename TestFixture::RealMtx; @@ -2718,11 +3896,12 @@ TYPED_TEST(Dense, GetImag) auto imag_mtx = this->mtx5->get_imag(); GKO_ASSERT_MTX_NEAR( - imag_mtx, l({{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}), 0); + imag_mtx, l({{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}), + 0.0); } -TYPED_TEST(Dense, GetImagWithGivenResult) +TYPED_TEST(Dense, GetImagIntoDense) { using T = typename TestFixture::value_type; using RealMtx = typename TestFixture::RealMtx; @@ -2737,11 +3916,12 @@ TYPED_TEST(Dense, GetImagWithGivenResult) this->mtx5->get_imag(imag_mtx.get()); GKO_ASSERT_MTX_NEAR( - imag_mtx, l({{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}), 0); + imag_mtx, l({{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}), + 0.0); } -TYPED_TEST(Dense, GetImagWithGivenResultFailsForWrongDimensions) +TYPED_TEST(Dense, GetImagIntoDenseFailsForWrongDimensions) { using T = typename TestFixture::value_type; using RealMtx = typename TestFixture::RealMtx; @@ -2823,12 +4003,31 @@ TYPED_TEST(DenseComplex, NonSquareMatrixIsConjugateTransposable) {T{1.0, 0.0}, T{0.0, 1.0}}}, exec); - auto trans = mtx->conj_transpose(); - auto trans_as_dense = static_cast(trans.get()); + auto trans = gko::as(mtx->conj_transpose()); + + GKO_ASSERT_MTX_NEAR(trans, + l({{T{1.0, -2.0}, T{-2.0, -1.5}, T{1.0, 0.0}}, + {T{-1.0, -2.1}, T{4.5, 0.0}, T{0.0, -1.0}}}), + 0.0); +} + + +TYPED_TEST(DenseComplex, NonSquareMatrixIsConjugateTransposableIntoDense) +{ + using Dense = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = gko::ReferenceExecutor::create(); + auto mtx = gko::initialize({{T{1.0, 2.0}, T{-1.0, 2.1}}, + {T{-2.0, 1.5}, T{4.5, 0.0}}, + {T{1.0, 0.0}, T{0.0, 1.0}}}, + exec); + auto trans = Dense::create(exec, gko::transpose(mtx->get_size())); + + mtx->conj_transpose(trans.get()); - GKO_ASSERT_MTX_NEAR(trans_as_dense, - l({{T{1.0, -2.0}, T{-2.0, -1.5}, T{1.0, 0.0}}, - {T{-1.0, -2.1}, T{4.5, 0.0}, T{0.0, -1.0}}}), + GKO_ASSERT_MTX_NEAR(trans, + l({{T{1.0, -2.0}, T{-2.0, -1.5}, T{1.0, 0.0}}, + {T{-1.0, -2.1}, T{4.5, 0.0}, T{0.0, -1.0}}}), 0.0); } @@ -2838,17 +4037,15 @@ TYPED_TEST(DenseComplex, InplaceAbsolute) using Mtx = typename TestFixture::Mtx; using T = typename TestFixture::value_type; auto exec = gko::ReferenceExecutor::create(); - // clang-format off - auto mtx = gko::initialize( - {{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, - {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, - {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, exec); - // clang-format on + auto mtx = gko::initialize({{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, + {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, + {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, + exec); mtx->compute_absolute_inplace(); GKO_ASSERT_MTX_NEAR( - mtx, l({{1.0, 5.0, 2.0}, {5.0, 1.0, 0.0}, {0.0, 1.5, 2.0}}), 0.0); + mtx, l({{1.0, 5.0, 2.0}, {5.0, 1.0, 0.0}, {0.0, 1.5, 2.0}}), 0.0); } @@ -2857,17 +4054,35 @@ TYPED_TEST(DenseComplex, OutplaceAbsolute) using Mtx = typename TestFixture::Mtx; using T = typename TestFixture::value_type; auto exec = gko::ReferenceExecutor::create(); - // clang-format off - auto mtx = gko::initialize( - {{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, - {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, - {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, exec); - // clang-format on + auto mtx = gko::initialize({{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, + {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, + {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, + exec); auto abs_mtx = mtx->compute_absolute(); GKO_ASSERT_MTX_NEAR( - abs_mtx, l({{1.0, 5.0, 2.0}, {5.0, 1.0, 0.0}, {0.0, 1.5, 2.0}}), 0.0); + abs_mtx, l({{1.0, 5.0, 2.0}, {5.0, 1.0, 0.0}, {0.0, 1.5, 2.0}}), + 0.0); +} + + +TYPED_TEST(DenseComplex, OutplaceAbsoluteIntoDense) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = gko::ReferenceExecutor::create(); + auto mtx = gko::initialize({{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, + {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, + {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, + exec); + auto abs_mtx = gko::remove_complex::create(exec, mtx->get_size()); + + mtx->compute_absolute(abs_mtx.get()); + + GKO_ASSERT_MTX_NEAR( + abs_mtx, l({{1.0, 5.0, 2.0}, {5.0, 1.0, 0.0}, {0.0, 1.5, 2.0}}), + 0.0); } @@ -2876,12 +4091,10 @@ TYPED_TEST(DenseComplex, MakeComplex) using Mtx = typename TestFixture::Mtx; using T = typename TestFixture::value_type; auto exec = gko::ReferenceExecutor::create(); - // clang-format off - auto mtx = gko::initialize( - {{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, - {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, - {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, exec); - // clang-format on + auto mtx = gko::initialize({{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, + {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, + {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, + exec); auto complex_mtx = mtx->make_complex(); @@ -2889,17 +4102,15 @@ TYPED_TEST(DenseComplex, MakeComplex) } -TYPED_TEST(DenseComplex, MakeComplexWithGivenResult) +TYPED_TEST(DenseComplex, MakeComplexIntoDense) { using Mtx = typename TestFixture::Mtx; using T = typename TestFixture::value_type; auto exec = gko::ReferenceExecutor::create(); - // clang-format off - auto mtx = gko::initialize( - {{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, - {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, - {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, exec); - // clang-format on + auto mtx = gko::initialize({{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, + {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, + {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, + exec); auto complex_mtx = Mtx::create(exec, mtx->get_size()); mtx->make_complex(complex_mtx.get()); @@ -2913,39 +4124,35 @@ TYPED_TEST(DenseComplex, GetReal) using Mtx = typename TestFixture::Mtx; using T = typename TestFixture::value_type; auto exec = gko::ReferenceExecutor::create(); - // clang-format off - auto mtx = gko::initialize( - {{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, - {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, - {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, exec); - // clang-format on + auto mtx = gko::initialize({{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, + {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, + {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, + exec); auto real_mtx = mtx->get_real(); GKO_ASSERT_MTX_NEAR( - real_mtx, l({{1.0, 3.0, 0.0}, {-4.0, -1.0, 0.0}, {0.0, 0.0, 2.0}}), + real_mtx, l({{1.0, 3.0, 0.0}, {-4.0, -1.0, 0.0}, {0.0, 0.0, 2.0}}), 0.0); } -TYPED_TEST(DenseComplex, GetRealWithGivenResult) +TYPED_TEST(DenseComplex, GetRealIntoDense) { using Mtx = typename TestFixture::Mtx; using RealMtx = typename TestFixture::RealMtx; using T = typename TestFixture::value_type; auto exec = gko::ReferenceExecutor::create(); - // clang-format off - auto mtx = gko::initialize( - {{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, - {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, - {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, exec); - // clang-format on + auto mtx = gko::initialize({{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, + {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, + {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, + exec); auto real_mtx = RealMtx::create(exec, mtx->get_size()); mtx->get_real(real_mtx.get()); GKO_ASSERT_MTX_NEAR( - real_mtx, l({{1.0, 3.0, 0.0}, {-4.0, -1.0, 0.0}, {0.0, 0.0, 2.0}}), + real_mtx, l({{1.0, 3.0, 0.0}, {-4.0, -1.0, 0.0}, {0.0, 0.0, 2.0}}), 0.0); } @@ -2955,39 +4162,35 @@ TYPED_TEST(DenseComplex, GetImag) using Mtx = typename TestFixture::Mtx; using T = typename TestFixture::value_type; auto exec = gko::ReferenceExecutor::create(); - // clang-format off - auto mtx = gko::initialize( - {{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, - {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, - {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, exec); - // clang-format on + auto mtx = gko::initialize({{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, + {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, + {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, + exec); auto imag_mtx = mtx->get_imag(); GKO_ASSERT_MTX_NEAR( - imag_mtx, l({{0.0, 4.0, 2.0}, {-3.0, 0.0, 0.0}, {0.0, -1.5, 0.0}}), + imag_mtx, l({{0.0, 4.0, 2.0}, {-3.0, 0.0, 0.0}, {0.0, -1.5, 0.0}}), 0.0); } -TYPED_TEST(DenseComplex, GetImagWithGivenResult) +TYPED_TEST(DenseComplex, GetImagIntoDense) { using Mtx = typename TestFixture::Mtx; using RealMtx = typename TestFixture::RealMtx; using T = typename TestFixture::value_type; auto exec = gko::ReferenceExecutor::create(); - // clang-format off - auto mtx = gko::initialize( - {{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, - {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, - {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, exec); - // clang-format on + auto mtx = gko::initialize({{T{1.0, 0.0}, T{3.0, 4.0}, T{0.0, 2.0}}, + {T{-4.0, -3.0}, T{-1.0, 0}, T{0.0, 0.0}}, + {T{0.0, 0.0}, T{0.0, -1.5}, T{2.0, 0.0}}}, + exec); auto imag_mtx = RealMtx::create(exec, mtx->get_size()); mtx->get_imag(imag_mtx.get()); GKO_ASSERT_MTX_NEAR( - imag_mtx, l({{0.0, 4.0, 2.0}, {-3.0, 0.0, 0.0}, {0.0, -1.5, 0.0}}), + imag_mtx, l({{0.0, 4.0, 2.0}, {-3.0, 0.0, 0.0}, {0.0, -1.5, 0.0}}), 0.0); } diff --git a/reference/test/matrix/identity.cpp b/reference/test/matrix/identity.cpp index 04e032d4b33..9e59e88c0ce 100644 --- a/reference/test/matrix/identity.cpp +++ b/reference/test/matrix/identity.cpp @@ -78,6 +78,22 @@ TYPED_TEST(Identity, AppliesToVector) } +TYPED_TEST(Identity, AppliesToMultipleVectors) +{ + using Id = typename TestFixture::Id; + using Vec = typename TestFixture::Vec; + using T = typename TestFixture::value_type; + auto identity = Id::create(this->exec, 3); + auto x = Vec::create(this->exec, gko::dim<2>{3, 2}, 3); + auto b = gko::initialize( + 3, {I{2.0, 3.0}, I{1.0, 2.0}, I{5.0, -1.0}}, this->exec); + + identity->apply(b.get(), x.get()); + + GKO_ASSERT_MTX_NEAR(x, l({{2.0, 3.0}, {1.0, 2.0}, {5.0, -1.0}}), 0.0); +} + + TYPED_TEST(Identity, AppliesToMixedVector) { using Id = typename TestFixture::Id;