diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 65a5f4f3d88..69af1c9530f 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -9,7 +9,8 @@ set(SOURCES preconditioner/block_jacobi.cpp solver/bicgstab.cpp solver/fcg.cpp - solver/cg.cpp) + solver/cg.cpp + solver/tfqmr.cpp) add_subdirectory(devices) # basic device functionalities, always compiled add_subdirectory(device_hooks) # placeholders for disabled modules diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index c48d04d92bb..96d32f8b593 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -40,6 +40,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/solver/bicgstab_kernels.hpp" #include "core/solver/cg_kernels.hpp" #include "core/solver/fcg_kernels.hpp" +#include "core/solver/tfqmr_kernels.hpp" #ifndef GKO_HOOK_MODULE @@ -198,6 +199,53 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_3_KERNEL); } // namespace bicgstab +namespace tfqmr { + + +template +GKO_DECLARE_TFQMR_INITIALIZE_KERNEL(ValueType) +NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_INITIALIZE_KERNEL); + +template +GKO_DECLARE_TFQMR_STEP_1_KERNEL(ValueType) +NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_1_KERNEL); + +template +GKO_DECLARE_TFQMR_STEP_2_KERNEL(ValueType) +NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_2_KERNEL); + +template +GKO_DECLARE_TFQMR_STEP_3_KERNEL(ValueType) +NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_3_KERNEL); + +template +GKO_DECLARE_TFQMR_STEP_4_KERNEL(ValueType) +NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_4_KERNEL); + +template +GKO_DECLARE_TFQMR_STEP_5_KERNEL(ValueType) +NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_5_KERNEL); + +template +GKO_DECLARE_TFQMR_STEP_6_KERNEL(ValueType) +NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_6_KERNEL); + +template +GKO_DECLARE_TFQMR_STEP_7_KERNEL(ValueType) +NOT_COMPILED(GKO_HOOK_MODULE); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_7_KERNEL); + + +} // namespace tfqmr + + namespace csr { diff --git a/core/solver/tfqmr.cpp b/core/solver/tfqmr.cpp new file mode 100644 index 00000000000..1b15612ca99 --- /dev/null +++ b/core/solver/tfqmr.cpp @@ -0,0 +1,248 @@ +/************************************************************* +Copyright 2017-2018 + +Karlsruhe Institute of Technology +Universitat Jaume I +University of Tennessee + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/tfqmr.hpp" + + +#include "core/base/exception.hpp" +#include "core/base/exception_helpers.hpp" +#include "core/base/executor.hpp" +#include "core/base/math.hpp" +#include "core/base/utils.hpp" +#include "core/solver/tfqmr_kernels.hpp" + + +namespace gko { +namespace solver { +namespace { + + +template +struct TemplatedOperation { + GKO_REGISTER_OPERATION(initialize, tfqmr::initialize); + GKO_REGISTER_OPERATION(step_1, tfqmr::step_1); + GKO_REGISTER_OPERATION(step_2, tfqmr::step_2); + GKO_REGISTER_OPERATION(step_3, tfqmr::step_3); + GKO_REGISTER_OPERATION(step_4, tfqmr::step_4); + GKO_REGISTER_OPERATION(step_5, tfqmr::step_5); + GKO_REGISTER_OPERATION(step_6, tfqmr::step_6); + GKO_REGISTER_OPERATION(step_7, tfqmr::step_7); +}; + + +/** + * Checks whether the required residual goal has been reached or not. + * + * @param tau Residual of the iteration. + * @param orig_tau Original residual. + * @param r Relative residual goal. + */ +template +bool has_converged(const matrix::Dense *tau, + const matrix::Dense *orig_tau, + remove_complex r) +{ + using std::abs; + for (size_type i = 0; i < tau->get_num_rows(); ++i) { + if (!(abs(tau->at(i, 0)) < r * abs(orig_tau->at(i, 0)))) { + return false; + } + } + return true; +} + + +} // namespace + + +template +void Tfqmr::apply(const LinOp *b, LinOp *x) const +{ + using std::swap; + using Vector = matrix::Dense; + ASSERT_CONFORMANT(system_matrix_, b); + ASSERT_EQUAL_DIMENSIONS(b, x); + + auto exec = this->get_executor(); + + auto one_op = initialize({one()}, exec); + auto neg_one_op = initialize({-one()}, exec); + + auto dense_b = as(b); + auto dense_x = as(x); + auto r = Vector::create_with_config_of(dense_b); + auto r0 = Vector::create_with_config_of(dense_b); + auto d = Vector::create_with_config_of(dense_b); + auto v = Vector::create_with_config_of(dense_b); + auto u_m = Vector::create_with_config_of(dense_b); + auto u_mp1 = Vector::create_with_config_of(dense_b); + auto w = Vector::create_with_config_of(dense_b); + auto Ad = Vector::create_with_config_of(dense_b); + auto Au = Vector::create_with_config_of(dense_b); + auto Au_new = Vector::create_with_config_of(dense_b); + auto pu_m = Vector::create_with_config_of(dense_b); + + auto alpha = Vector::create(exec, 1, dense_b->get_num_cols()); + auto beta = Vector::create_with_config_of(alpha.get()); + auto sigma = Vector::create_with_config_of(alpha.get()); + auto rho_old = Vector::create_with_config_of(alpha.get()); + auto rho = Vector::create_with_config_of(alpha.get()); + auto taut = Vector::create_with_config_of(alpha.get()); + auto tau = Vector::create_with_config_of(alpha.get()); + auto nomw = Vector::create_with_config_of(alpha.get()); + auto theta = Vector::create_with_config_of(alpha.get()); + auto eta = Vector::create_with_config_of(alpha.get()); + auto rov = Vector::create_with_config_of(alpha.get()); + + auto master_tau = + Vector::create(exec->get_master(), 1, dense_b->get_num_cols()); + auto starting_tau = Vector::create_with_config_of(master_tau.get()); + + // TODO: replace this with automatic merged kernel generator + exec->run(TemplatedOperation::make_initialize_operation( + dense_b, r.get(), r0.get(), u_m.get(), u_mp1.get(), pu_m.get(), + Au.get(), Ad.get(), w.get(), v.get(), d.get(), taut.get(), + rho_old.get(), rho.get(), alpha.get(), beta.get(), taut.get(), + sigma.get(), rov.get(), eta.get(), nomw.get(), theta.get())); + // r = dense_b + // r0 = u_m = w = r + // Ad = d = 0 + // theta = eta = rov = alpha = beta = sigma = 1.0 + + system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), r.get()); + r0->copy_from(r.get()); + r->compute_dot(r.get(), tau.get()); + w->copy_from(r.get()); + u_m->copy_from(r.get()); + starting_tau->copy_from(tau.get()); + rho->copy_from(tau.get()); + rho_old->copy_from(tau.get()); + taut->copy_from(tau.get()); + preconditioner_->apply(u_m.get(), pu_m.get()); + system_matrix_->apply(pu_m.get(), v.get()); + Au->copy_from(v.get()); + Ad->copy_from(d.get()); + + for (int iter = 0; iter < max_iters_; ++iter) { + if (iter % 2 == 0) { + r0->compute_dot(v.get(), rov.get()); + exec->run(TemplatedOperation::make_step_1_operation( + alpha.get(), rov.get(), rho.get(), v.get(), u_m.get(), + u_mp1.get())); + // alpha = rho / rov + // u_mp1 = u_m - alpha * v + } + exec->run(TemplatedOperation::make_step_2_operation( + theta.get(), alpha.get(), eta.get(), sigma.get(), Au.get(), + pu_m.get(), w.get(), d.get(), Ad.get())); + // sigma = (theta^2 / alpha) * eta; + // w = w - alpha * Au + // d = pu_m + sigma * d + // Ad = Au + sigma * Ad + w->compute_dot(w.get(), nomw.get()); + exec->run(TemplatedOperation::make_step_3_operation( + theta.get(), nomw.get(), taut.get(), eta.get(), alpha.get())); + // theta = nomw / taut + // c_mp1 = 1 / (1 + theta) + // taut = taut * sqrt(theta) * c_mp1; + // eta = c_mp1 * c_mp1 * alpha; + exec->run(TemplatedOperation::make_step_4_operation( + eta.get(), d.get(), Ad.get(), dense_x, r.get())); + // x = x + eta * d + // r = r - eta * Ad + r->compute_dot(r.get(), tau.get()); + master_tau->copy_from(tau.get()); + if (has_converged(master_tau.get(), starting_tau.get(), + rel_residual_goal_)) { + break; + } + if (iter % 2 != 0) { + r0->compute_dot(w.get(), rho.get()); + exec->run(TemplatedOperation::make_step_5_operation( + beta.get(), rho_old.get(), rho.get(), w.get(), u_m.get(), + u_mp1.get())); + // beta = rho / rho_old + // u_mp1 = w + beta * u_m + // this is equivalent to step1 only different input + rho_old->copy_from(rho.get()); + } + preconditioner_->apply(u_mp1.get(), pu_m.get()); + system_matrix_->apply(pu_m.get(), Au_new.get()); + if (iter % 2 != 0) { + exec->run(TemplatedOperation::make_step_6_operation( + beta.get(), Au_new.get(), Au.get(), v.get())); + // v = Au_new + beta * (Au + beta * v) + } + exec->run(TemplatedOperation::make_step_7_operation( + Au_new.get(), u_mp1.get(), Au.get(), u_m.get())); + // Au = Au_new + // u_m = u_mp1 + } +} + + +template +void Tfqmr::apply(const LinOp *alpha, const LinOp *b, + const LinOp *beta, LinOp *x) const +{ + auto dense_x = as>(x); + auto x_clone = dense_x->clone(); + this->apply(b, x_clone.get()); + dense_x->scale(beta); + dense_x->add_scaled(alpha, x_clone.get()); +} + + +template +std::unique_ptr TfqmrFactory::generate( + std::shared_ptr base) const +{ + ASSERT_EQUAL_DIMENSIONS(base, + size(base->get_num_cols(), base->get_num_rows())); + auto tfqmr = std::unique_ptr>(Tfqmr::create( + this->get_executor(), max_iters_, rel_residual_goal_, base)); + tfqmr->set_preconditioner(precond_factory_->generate(base)); + return std::move(tfqmr); +} + + +#define GKO_DECLARE_TFQMR(_type) class Tfqmr<_type> +#define GKO_DECLARE_TFQMR_FACTORY(_type) class TfqmrFactory<_type> +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_FACTORY); +#undef GKO_DECLARE_TFQMR +#undef GKO_DECLARE_TFQMR_FACTORY + + +} // namespace solver +} // namespace gko diff --git a/core/solver/tfqmr.hpp b/core/solver/tfqmr.hpp new file mode 100644 index 00000000000..a45f4f2c5ad --- /dev/null +++ b/core/solver/tfqmr.hpp @@ -0,0 +1,193 @@ +/************************************************************* +Copyright 2017-2018 + +Karlsruhe Institute of Technology +Universitat Jaume I +University of Tennessee + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_SOLVER_TFQMR_HPP_ +#define GKO_CORE_SOLVER_TFQMR_HPP_ + + +#include "core/base/array.hpp" +#include "core/base/lin_op.hpp" +#include "core/base/math.hpp" +#include "core/base/types.hpp" +#include "core/matrix/identity.hpp" + + +namespace gko { +namespace solver { + + +template +class TfqmrFactory; + +/** + * TFQMR or the Transpose-Free Quasi-Minimal Residual is a Krylov subspace + * solver that derives from QMR by avoiding the need of transposing the system + * matrix. + * + * Being a generic solver, it is capable of solving general matrices, including + * non-s.p.d matrices. + * + * @tparam ValueType precision of the elements of the system matrix. + */ +template +class Tfqmr : public BasicLinOp>, public PreconditionedMethod { + friend class BasicLinOp; + friend class TfqmrFactory; + +public: + using BasicLinOp::convert_to; + using BasicLinOp::move_to; + + using value_type = ValueType; + + void apply(const LinOp *b, LinOp *x) const override; + + void apply(const LinOp *alpha, const LinOp *b, const LinOp *beta, + LinOp *x) const override; + + /** + * Gets the system matrix of the linear system. + * + * @return The system matrix. + */ + std::shared_ptr get_system_matrix() const + { + return system_matrix_; + } + + /** + * Gets the maximum number of iterations of the TFQMR solver. + * + * @return The maximum number of iterations. + */ + int get_max_iters() const { return max_iters_; } + + /** + * Gets the relative residual goal of the solver. + * + * @return The relative residual goal. + */ + remove_complex get_rel_residual_goal() const + { + return rel_residual_goal_; + } + +protected: + using BasicLinOp::create; + + explicit Tfqmr(std::shared_ptr exec) + : BasicLinOp(exec, 0, 0, 0) + {} + + Tfqmr(std::shared_ptr exec, int max_iters, + remove_complex rel_residual_goal, + std::shared_ptr system_matrix) + : BasicLinOp( + exec, system_matrix->get_num_cols(), + system_matrix->get_num_rows(), + system_matrix->get_num_rows() * system_matrix->get_num_cols()), + system_matrix_(std::move(system_matrix)), + max_iters_(max_iters), + rel_residual_goal_(rel_residual_goal) + {} + +private: + std::shared_ptr system_matrix_{}; + int max_iters_{}; + remove_complex rel_residual_goal_{}; +}; + + +/** + * Creates the TFQMR solver. + * + * @param exec The executor on which the TFQMR solver is to be created. + * @param max_iters The maximum number of iterations to be pursued. + * @param rel_residual_goal The relative residual required for + * convergence. + * + * @return The newly created TFQMR solver. + */ +template +class TfqmrFactory : public LinOpFactory, public PreconditionedMethodFactory { +public: + using value_type = ValueType; + + static std::unique_ptr create( + std::shared_ptr exec, int max_iters, + remove_complex rel_residual_goal) + { + return std::unique_ptr( + new TfqmrFactory(std::move(exec), max_iters, rel_residual_goal)); + } + + std::unique_ptr generate( + std::shared_ptr base) const override; + + /** + * Gets the maximum number of iterations of the TFQMR solver. + * + * @return The maximum number of iterations. + */ + int get_max_iters() const { return max_iters_; } + + /** + * Gets the relative residual goal of the solver. + * + * @return The relative residual goal. + */ + remove_complex get_rel_residual_goal() const + { + return rel_residual_goal_; + } + +protected: + TfqmrFactory(std::shared_ptr exec, int max_iters, + remove_complex rel_residual_goal) + : LinOpFactory(exec), + PreconditionedMethodFactory( + matrix::IdentityFactory::create(std::move(exec))), + max_iters_(max_iters), + rel_residual_goal_(rel_residual_goal) + {} + + int max_iters_; + remove_complex rel_residual_goal_; +}; + + +} // namespace solver +} // namespace gko + + +#endif // GKO_CORE_SOLVER_TFQMR_HPP diff --git a/core/solver/tfqmr_kernels.hpp b/core/solver/tfqmr_kernels.hpp new file mode 100644 index 00000000000..e4806d50c59 --- /dev/null +++ b/core/solver/tfqmr_kernels.hpp @@ -0,0 +1,170 @@ +/************************************************************* +Copyright 2017-2018 + +Karlsruhe Institute of Technology +Universitat Jaume I +University of Tennessee + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_SOLVER_TFQMR_KERNELS_HPP_ +#define GKO_CORE_SOLVER_TFQMR_KERNELS_HPP_ + + +#include "core/base/types.hpp" +#include "core/matrix/dense.hpp" + + +namespace gko { +namespace kernels { +namespace tfqmr { + + +#define GKO_DECLARE_TFQMR_INITIALIZE_KERNEL(_type) \ + void initialize(std::shared_ptr exec, \ + const matrix::Dense<_type> *b, matrix::Dense<_type> *r, \ + matrix::Dense<_type> *r0, matrix::Dense<_type> *u_m, \ + matrix::Dense<_type> *u_mp1, matrix::Dense<_type> *pu_m, \ + matrix::Dense<_type> *Au, matrix::Dense<_type> *Ad, \ + matrix::Dense<_type> *w, matrix::Dense<_type> *v, \ + matrix::Dense<_type> *d, matrix::Dense<_type> *taut, \ + matrix::Dense<_type> *rho_old, matrix::Dense<_type> *rho, \ + matrix::Dense<_type> *alpha, matrix::Dense<_type> *beta, \ + matrix::Dense<_type> *tau, matrix::Dense<_type> *sigma, \ + matrix::Dense<_type> *rov, matrix::Dense<_type> *eta, \ + matrix::Dense<_type> *nomw, matrix::Dense<_type> *theta) + + +#define GKO_DECLARE_TFQMR_STEP_1_KERNEL(_type) \ + void step_1(std::shared_ptr exec, \ + matrix::Dense<_type> *alpha, const matrix::Dense<_type> *rov, \ + const matrix::Dense<_type> *rho, \ + const matrix::Dense<_type> *v, \ + const matrix::Dense<_type> *u_m, matrix::Dense<_type> *u_mp1) + + +#define GKO_DECLARE_TFQMR_STEP_2_KERNEL(_type) \ + void step_2(std::shared_ptr exec, \ + const matrix::Dense<_type> *theta, \ + const matrix::Dense<_type> *alpha, \ + const matrix::Dense<_type> *eta, matrix::Dense<_type> *sigma, \ + const matrix::Dense<_type> *Au, \ + const matrix::Dense<_type> *pu_m, matrix::Dense<_type> *w, \ + matrix::Dense<_type> *d, matrix::Dense<_type> *Ad) + + +#define GKO_DECLARE_TFQMR_STEP_3_KERNEL(_type) \ + void step_3(std::shared_ptr exec, \ + matrix::Dense<_type> *theta, const matrix::Dense<_type> *nomw, \ + matrix::Dense<_type> *taut, matrix::Dense<_type> *eta, \ + const matrix::Dense<_type> *alpha) + + +#define GKO_DECLARE_TFQMR_STEP_4_KERNEL(_type) \ + void step_4(std::shared_ptr exec, \ + const matrix::Dense<_type> *eta, \ + const matrix::Dense<_type> *d, const matrix::Dense<_type> *Ad, \ + matrix::Dense<_type> *x, matrix::Dense<_type> *r) + + +#define GKO_DECLARE_TFQMR_STEP_5_KERNEL(_type) \ + void step_5( \ + std::shared_ptr exec, \ + matrix::Dense<_type> *beta, const matrix::Dense<_type> *rho_old, \ + const matrix::Dense<_type> *rho, const matrix::Dense<_type> *w, \ + const matrix::Dense<_type> *u_m, matrix::Dense<_type> *u_mp1) + + +#define GKO_DECLARE_TFQMR_STEP_6_KERNEL(_type) \ + void step_6(std::shared_ptr exec, \ + const matrix::Dense<_type> *beta, \ + const matrix::Dense<_type> *Au_new, \ + const matrix::Dense<_type> *Au, matrix::Dense<_type> *v) + +#define GKO_DECLARE_TFQMR_STEP_7_KERNEL(_type) \ + void step_7(std::shared_ptr exec, \ + const matrix::Dense<_type> *Au_new, \ + const matrix::Dense<_type> *u_mp1, matrix::Dense<_type> *Au, \ + matrix::Dense<_type> *u_m) + +#define DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_TFQMR_INITIALIZE_KERNEL(ValueType); \ + template \ + GKO_DECLARE_TFQMR_STEP_1_KERNEL(ValueType); \ + template \ + GKO_DECLARE_TFQMR_STEP_2_KERNEL(ValueType); \ + template \ + GKO_DECLARE_TFQMR_STEP_3_KERNEL(ValueType); \ + template \ + GKO_DECLARE_TFQMR_STEP_4_KERNEL(ValueType); \ + template \ + GKO_DECLARE_TFQMR_STEP_5_KERNEL(ValueType); \ + template \ + GKO_DECLARE_TFQMR_STEP_6_KERNEL(ValueType); \ + template \ + GKO_DECLARE_TFQMR_STEP_7_KERNEL(ValueType) + + +} // namespace tfqmr + + +namespace cpu { +namespace tfqmr { + +DECLARE_ALL_AS_TEMPLATES; + +} // namespace tfqmr +} // namespace cpu + + +namespace gpu { +namespace tfqmr { + +DECLARE_ALL_AS_TEMPLATES; + +} // namespace tfqmr +} // namespace gpu + + +namespace reference { +namespace tfqmr { + +DECLARE_ALL_AS_TEMPLATES; + +} // namespace tfqmr +} // namespace reference + + +#undef DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_SOLVER_TFQMR_KERNELS_HPP_ diff --git a/core/test/solver/CMakeLists.txt b/core/test/solver/CMakeLists.txt index 2c94d727025..5ec3e2dbb81 100644 --- a/core/test/solver/CMakeLists.txt +++ b/core/test/solver/CMakeLists.txt @@ -1,3 +1,4 @@ create_test(cg) create_test(fcg) create_test(bicgstab) +create_test(tfqmr) diff --git a/core/test/solver/tfqmr.cpp b/core/test/solver/tfqmr.cpp new file mode 100644 index 00000000000..160969fe5ff --- /dev/null +++ b/core/test/solver/tfqmr.cpp @@ -0,0 +1,190 @@ +/************************************************************* +Copyright 2017-2018 + +Karlsruhe Institute of Technology +Universitat Jaume I +University of Tennessee + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include + + +namespace { + + +class Tfqmr : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense<>; + using Solver = gko::solver::Tfqmr<>; + + Tfqmr() + : exec(gko::ReferenceExecutor::create()), + mtx(gko::initialize( + {{2, -1.0, 0.0}, {-1.0, 2, -1.0}, {0.0, -1.0, 2}}, exec)), + tfqmr_factory(gko::solver::TfqmrFactory<>::create(exec, 3, 1e-6)), + solver(tfqmr_factory->generate(mtx)) + {} + + std::shared_ptr exec; + std::shared_ptr mtx; + std::unique_ptr> tfqmr_factory; + std::unique_ptr solver; + + static void assert_same_matrices(const Mtx *m1, const Mtx *m2) + { + ASSERT_EQ(m1->get_num_rows(), m2->get_num_rows()); + ASSERT_EQ(m1->get_num_cols(), m2->get_num_cols()); + for (gko::size_type i = 0; i < m1->get_num_rows(); ++i) { + for (gko::size_type j = 0; j < m2->get_num_cols(); ++j) { + EXPECT_EQ(m1->at(i, j), m2->at(i, j)); + } + } + } +}; + + +TEST_F(Tfqmr, TfqmrFactoryKnowsItsExecutor) +{ + ASSERT_EQ(tfqmr_factory->get_executor(), exec); +} + + +TEST_F(Tfqmr, TfqmrFactoryKnowsItsIterationLimit) +{ + ASSERT_EQ(tfqmr_factory->get_max_iters(), 3); +} + + +TEST_F(Tfqmr, TfqmrFactoryKnowsItsRelResidualGoal) +{ + ASSERT_EQ(tfqmr_factory->get_rel_residual_goal(), 1e-6); +} + + +TEST_F(Tfqmr, TfqmrFactoryCreatesCorrectSolver) +{ + ASSERT_EQ(solver->get_num_rows(), 3); + ASSERT_EQ(solver->get_num_cols(), 3); + ASSERT_EQ(solver->get_num_stored_elements(), 9); + auto tfqmr_solver = static_cast(solver.get()); + ASSERT_EQ(tfqmr_solver->get_max_iters(), 3); + ASSERT_EQ(tfqmr_solver->get_rel_residual_goal(), 1e-6); + ASSERT_NE(tfqmr_solver->get_system_matrix(), nullptr); + ASSERT_EQ(tfqmr_solver->get_system_matrix(), mtx); +} + + +TEST_F(Tfqmr, CanBeCopied) +{ + auto copy = tfqmr_factory->generate(Mtx::create(exec)); + + copy->copy_from(solver.get()); + + ASSERT_EQ(copy->get_num_rows(), 3); + ASSERT_EQ(copy->get_num_cols(), 3); + ASSERT_EQ(copy->get_num_stored_elements(), 9); + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + assert_same_matrices(static_cast(copy_mtx.get()), mtx.get()); +} + + +TEST_F(Tfqmr, CanBeMoved) +{ + auto copy = tfqmr_factory->generate(Mtx::create(exec)); + + copy->copy_from(std::move(solver)); + + ASSERT_EQ(copy->get_num_rows(), 3); + ASSERT_EQ(copy->get_num_cols(), 3); + ASSERT_EQ(copy->get_num_stored_elements(), 9); + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + assert_same_matrices(static_cast(copy_mtx.get()), mtx.get()); +} + + +TEST_F(Tfqmr, CanBeCloned) +{ + auto clone = solver->clone(); + + ASSERT_EQ(clone->get_num_rows(), 3); + ASSERT_EQ(clone->get_num_cols(), 3); + ASSERT_EQ(clone->get_num_stored_elements(), 9); + auto clone_mtx = static_cast(clone.get())->get_system_matrix(); + assert_same_matrices(static_cast(clone_mtx.get()), mtx.get()); +} + + +TEST_F(Tfqmr, CanBeCleared) +{ + solver->clear(); + + ASSERT_EQ(solver->get_num_rows(), 0); + ASSERT_EQ(solver->get_num_cols(), 0); + ASSERT_EQ(solver->get_num_stored_elements(), 0); + auto solver_mtx = static_cast(solver.get())->get_system_matrix(); + ASSERT_EQ(solver_mtx, nullptr); +} + + +TEST_F(Tfqmr, CanSetPreconditioner) +{ + std::shared_ptr precond = + gko::initialize({{1.0, 0.0, 0.0, 0.0, 1.0, 0.0}}, exec); + auto tfqmr_solver = static_cast *>(solver.get()); + + tfqmr_solver->set_preconditioner(precond); + + ASSERT_EQ(tfqmr_solver->get_preconditioner(), precond); +} + + +TEST_F(Tfqmr, CanSetPreconditionerGenertor) +{ + tfqmr_factory->set_preconditioner( + gko::solver::TfqmrFactory<>::create(exec, 3, 0.0)); + auto solver = tfqmr_factory->generate(mtx); + auto precond = dynamic_cast *>( + static_cast *>(solver.get()) + ->get_preconditioner() + .get()); + + ASSERT_NE(precond, nullptr); + ASSERT_EQ(precond->get_num_rows(), 3); + ASSERT_EQ(precond->get_num_cols(), 3); + ASSERT_EQ(precond->get_system_matrix(), mtx); +} + + +} // namespace diff --git a/cpu/solver/tfqmr_kernels.cpp b/cpu/solver/tfqmr_kernels.cpp new file mode 100644 index 00000000000..82bc75264dd --- /dev/null +++ b/cpu/solver/tfqmr_kernels.cpp @@ -0,0 +1,98 @@ +/************************************************************* +Copyright 2017-2018 + +Karlsruhe Institute of Technology +Universitat Jaume I +University of Tennessee + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/tfqmr_kernels.hpp" + + +#include "core/base/exception_helpers.hpp" + + +namespace gko { +namespace kernels { +namespace cpu { +namespace tfqmr { + + +template +void initialize(std::shared_ptr exec, + const matrix::Dense *b, matrix::Dense *r, + matrix::Dense *rr, matrix::Dense *y, + matrix::Dense *s, matrix::Dense *t, + matrix::Dense *z, matrix::Dense *v, + matrix::Dense *p, matrix::Dense *prev_rho, + matrix::Dense *rho, matrix::Dense *alpha, + matrix::Dense *beta, matrix::Dense *gamma, + matrix::Dense *omega) NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_INITIALIZE_KERNEL); + + +template +void step_1(std::shared_ptr exec, + const matrix::Dense *r, matrix::Dense *p, + const matrix::Dense *v, + const matrix::Dense *rho, + const matrix::Dense *prev_rho, + const matrix::Dense *alpha, + const matrix::Dense *omega) NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_1_KERNEL); + + +template +void step_2(std::shared_ptr exec, + const matrix::Dense *r, matrix::Dense *s, + const matrix::Dense *v, + const matrix::Dense *rho, + matrix::Dense *alpha, + const matrix::Dense *beta) NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_2_KERNEL); + + +template +void step_3( + std::shared_ptr exec, matrix::Dense *x, + matrix::Dense *r, const matrix::Dense *s, + const matrix::Dense *t, const matrix::Dense *y, + const matrix::Dense *z, const matrix::Dense *alpha, + const matrix::Dense *beta, const matrix::Dense *gamma, + matrix::Dense *omega) NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_3_KERNEL); + + +} // namespace tfqmr +} // namespace cpu +} // namespace kernels +} // namespace gko diff --git a/gpu/CMakeLists.txt b/gpu/CMakeLists.txt index 5436c0c9790..73edc0ad710 100644 --- a/gpu/CMakeLists.txt +++ b/gpu/CMakeLists.txt @@ -8,7 +8,8 @@ set(SOURCES preconditioner/block_jacobi_kernels.cu solver/bicgstab_kernels.cu solver/fcg_kernels.cu - solver/cg_kernels.cu) + solver/cg_kernels.cu + solver/tfqmr_kernels.cu) set(CUDA_INCLUDE_DIRS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) find_library(CUDA_RUNTIME_LIBS cudart diff --git a/gpu/solver/tfqmr_kernels.cu b/gpu/solver/tfqmr_kernels.cu new file mode 100644 index 00000000000..2e72b6f13af --- /dev/null +++ b/gpu/solver/tfqmr_kernels.cu @@ -0,0 +1,325 @@ +/************************************************************* +Copyright 2017-2018 + +Karlsruhe Institute of Technology +Universitat Jaume I +University of Tennessee + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/tfqmr_kernels.hpp" + + +#include "core/base/exception_helpers.hpp" +#include "core/base/math.hpp" +#include "gpu/base/types.hpp" + + +namespace gko { +namespace kernels { +namespace gpu { +namespace tfqmr { + + +constexpr int default_block_size = 512; + + +template +__global__ __launch_bounds__(default_block_size) void initialize_kernel( + size_type num_rows, size_type num_cols, size_type stride, + const ValueType *__restrict__ b, ValueType *__restrict__ r, + ValueType *__restrict__ rr, ValueType *__restrict__ y, + ValueType *__restrict__ s, ValueType *__restrict__ t, + ValueType *__restrict__ z, ValueType *__restrict__ v, + ValueType *__restrict__ p, ValueType *__restrict__ prev_rho, + ValueType *__restrict__ rho, ValueType *__restrict__ alpha, + ValueType *__restrict__ beta, ValueType *__restrict__ gamma, + ValueType *__restrict__ omega) +{ + const auto tidx = + static_cast(blockDim.x) * blockIdx.x + threadIdx.x; + + if (tidx < num_cols) { + prev_rho[tidx] = one(); + rho[tidx] = one(); + alpha[tidx] = one(); + beta[tidx] = one(); + gamma[tidx] = one(); + omega[tidx] = one(); + } + + if (tidx < num_rows * stride) { + r[tidx] = b[tidx]; + rr[tidx] = zero(); + y[tidx] = zero(); + s[tidx] = zero(); + t[tidx] = zero(); + z[tidx] = zero(); + v[tidx] = zero(); + p[tidx] = zero(); + } +} + + +template +void initialize(std::shared_ptr exec, + const matrix::Dense *b, matrix::Dense *r, + matrix::Dense *r0, matrix::Dense *u_m, + matrix::Dense *u_mp1, matrix::Dense *pu_m, + matrix::Dense *Au, matrix::Dense *Ad, + matrix::Dense *w, matrix::Dense *v, + matrix::Dense *d, matrix::Dense *taut, + matrix::Dense *rho_old, + matrix::Dense *rho, matrix::Dense *alpha, + matrix::Dense *beta, matrix::Dense *tau, + matrix::Dense *sigma, matrix::Dense *rov, + matrix::Dense *eta, matrix::Dense *nomw, + matrix::Dense *theta) NOT_IMPLEMENTED + /* + { + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size( + ceildiv(b->get_num_rows() * b->get_stride(), block_size.x), 1, 1); + + initialize_kernel<<>>( + b->get_num_rows(), b->get_num_cols(), b->get_stride(), + as_cuda_type(b->get_const_values()), as_cuda_type(r->get_values()), + as_cuda_type(rr->get_values()), as_cuda_type(y->get_values()), + as_cuda_type(s->get_values()), as_cuda_type(t->get_values()), + as_cuda_type(z->get_values()), as_cuda_type(v->get_values()), + as_cuda_type(p->get_values()), as_cuda_type(prev_rho->get_values()), + as_cuda_type(rho->get_values()), as_cuda_type(alpha->get_values()), + as_cuda_type(beta->get_values()), as_cuda_type(gamma->get_values()), + as_cuda_type(omega->get_values())); + } + */ + GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_INITIALIZE_KERNEL); + + +template +__global__ __launch_bounds__(default_block_size) void step_1_kernel( + size_type num_rows, size_type num_cols, size_type stride, + const ValueType *__restrict__ r, ValueType *__restrict__ p, + const ValueType *__restrict__ v, const ValueType *__restrict__ rho, + const ValueType *__restrict__ prev_rho, const ValueType *__restrict__ alpha, + const ValueType *__restrict__ omega) +{ + const auto tidx = + static_cast(blockDim.x) * blockIdx.x + threadIdx.x; + const auto col = tidx % stride; + if (col >= num_cols || tidx >= num_rows * stride) { + return; + } + auto res = r[tidx]; + if (prev_rho[col] * omega[col] != zero()) { + const auto tmp = (rho[col] / prev_rho[col]) * (alpha[col] / omega[col]); + res += tmp * (p[tidx] - omega[col] * v[tidx]); + } + p[tidx] = res; +} + + +template +void step_1(std::shared_ptr exec, + matrix::Dense *alpha, + const matrix::Dense *rov, + const matrix::Dense *rho, + const matrix::Dense *v, + const matrix::Dense *u_m, + matrix::Dense *u_mp1) NOT_IMPLEMENTED + /* + { + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size( + ceildiv(r->get_num_rows() * r->get_stride(), block_size.x), 1, 1); + + step_1_kernel<<>>( + r->get_num_rows(), r->get_num_cols(), r->get_stride(), + as_cuda_type(r->get_const_values()), as_cuda_type(p->get_values()), + as_cuda_type(v->get_const_values()), + as_cuda_type(rho->get_const_values()), + as_cuda_type(prev_rho->get_const_values()), + as_cuda_type(alpha->get_const_values()), + as_cuda_type(omega->get_const_values())); + } + */ + GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_1_KERNEL); + + +template +__global__ __launch_bounds__(default_block_size) void step_2_kernel( + size_type num_rows, size_type num_cols, size_type stride, + const ValueType *__restrict__ r, ValueType *__restrict__ s, + const ValueType *__restrict__ v, const ValueType *__restrict__ rho, + ValueType *__restrict__ alpha, const ValueType *__restrict__ beta) +{ + const size_type tidx = + static_cast(blockDim.x) * blockIdx.x + threadIdx.x; + const size_type col = tidx % stride; + if (col >= num_cols || tidx >= num_rows * stride) { + return; + } + auto t_alpha = zero(); + auto t_s = r[tidx]; + if (beta[col] != zero()) { + t_alpha = rho[col] / beta[col]; + t_s -= t_alpha * v[tidx]; + } + alpha[col] = t_alpha; + s[tidx] = t_s; +} + + +template +void step_2(std::shared_ptr exec, + const matrix::Dense *theta, + const matrix::Dense *alpha, + const matrix::Dense *eta, + matrix::Dense *sigma, const matrix::Dense *Au, + const matrix::Dense *pu_m, matrix::Dense *w, + matrix::Dense *d, + matrix::Dense *Ad) NOT_IMPLEMENTED + /* + { + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size( + ceildiv(r->get_num_rows() * r->get_stride(), block_size.x), 1, 1); + + step_2_kernel<<>>( + r->get_num_rows(), r->get_num_cols(), r->get_stride(), + as_cuda_type(r->get_const_values()), as_cuda_type(s->get_values()), + as_cuda_type(v->get_const_values()), + as_cuda_type(rho->get_const_values()), + as_cuda_type(alpha->get_values()), + as_cuda_type(beta->get_const_values())); + } + */ + GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_2_KERNEL); + + +template +__global__ __launch_bounds__(default_block_size) void step_3_kernel( + size_type num_rows, size_type num_cols, size_type stride, + size_type x_stride, ValueType *__restrict__ x, ValueType *__restrict__ r, + const ValueType *__restrict__ s, const ValueType *__restrict__ t, + const ValueType *__restrict__ y, const ValueType *__restrict__ z, + const ValueType *__restrict__ alpha, const ValueType *__restrict__ beta, + const ValueType *__restrict__ gamma, ValueType *__restrict__ omega) +{ + const auto tidx = + static_cast(blockDim.x) * blockIdx.x + threadIdx.x; + const auto row = tidx / stride; + const auto col = tidx % stride; + if (col >= num_cols || tidx >= num_rows * stride) { + return; + } + const auto x_pos = row * x_stride + col; + auto t_omega = zero(); + auto t_x = x[x_pos] + alpha[col] * y[tidx]; + auto t_r = s[tidx]; + if (beta[col] != zero()) { + t_omega = gamma[col] / beta[col]; + t_x += t_omega * z[tidx]; + t_r -= t_omega * t[tidx]; + } + omega[col] = t_omega; + x[x_pos] = t_x; + r[tidx] = t_r; +} + +template +void step_3(std::shared_ptr exec, + matrix::Dense *theta, + const matrix::Dense *nomw, + matrix::Dense *taut, matrix::Dense *eta, + const matrix::Dense *alpha) NOT_IMPLEMENTED + /* + { + const dim3 block_size(default_block_size, 1, 1); + const dim3 grid_size( + ceildiv(r->get_num_rows() * r->get_stride(), block_size.x), 1, 1); + + step_3_kernel<<>>( + r->get_num_rows(), r->get_num_cols(), r->get_stride(), + x->get_stride(), as_cuda_type(x->get_values()), + as_cuda_type(r->get_values()), as_cuda_type(s->get_const_values()), + as_cuda_type(t->get_const_values()), + as_cuda_type(y->get_const_values()), + as_cuda_type(z->get_const_values()), + as_cuda_type(alpha->get_const_values()), + as_cuda_type(beta->get_const_values()), + as_cuda_type(gamma->get_const_values()), + as_cuda_type(omega->get_values())); + } + */ + GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_3_KERNEL); + + +template +void step_4(std::shared_ptr exec, + const matrix::Dense *eta, + const matrix::Dense *d, + const matrix::Dense *Ad, matrix::Dense *x, + matrix::Dense *r) NOT_IMPLEMENTED + + GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_4_KERNEL); + + +template +void step_5(std::shared_ptr exec, + matrix::Dense *beta, + const matrix::Dense *rho_old, + const matrix::Dense *rho, + const matrix::Dense *w, + const matrix::Dense *u_m, + matrix::Dense *u_mp1) NOT_IMPLEMENTED + + GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_5_KERNEL); + + +template +void step_6(std::shared_ptr exec, + const matrix::Dense *beta, + const matrix::Dense *Au_new, + const matrix::Dense *Au, + matrix::Dense *v) NOT_IMPLEMENTED + + GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_6_KERNEL); + +template +void step_7(std::shared_ptr exec, + const matrix::Dense *Au_new, + const matrix::Dense *u_mp1, matrix::Dense *Au, + matrix::Dense *u_m) NOT_IMPLEMENTED + + GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_7_KERNEL); + + +} // namespace tfqmr +} // namespace gpu +} // namespace kernels +} // namespace gko diff --git a/gpu/test/solver/tfqmr_kernels.cpp b/gpu/test/solver/tfqmr_kernels.cpp new file mode 100644 index 00000000000..8d6cd653e54 --- /dev/null +++ b/gpu/test/solver/tfqmr_kernels.cpp @@ -0,0 +1,320 @@ +/************************************************************* +Copyright 2017-2018 + +Karlsruhe Institute of Technology +Universitat Jaume I +University of Tennessee + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include +#include +#include +#include +#include + + +namespace { + + +class Tfqmr : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense<>; + Tfqmr() : rand_engine(30) {} + + void SetUp() + { + ASSERT_GT(gko::GpuExecutor::get_num_devices(), 0); + ref = gko::ReferenceExecutor::create(); + gpu = gko::GpuExecutor::create(0, ref); + + mtx = gen_mtx(123, 123); + make_diag_dominant(mtx.get()); + d_mtx = Mtx::create(gpu); + d_mtx->copy_from(mtx.get()); + gpu_tfqmr_factory = + gko::solver::TfqmrFactory<>::create(gpu, 246, 1e-15); + ref_tfqmr_factory = + gko::solver::TfqmrFactory<>::create(ref, 246, 1e-15); + } + + void TearDown() + { + if (gpu != nullptr) { + ASSERT_NO_THROW(gpu->synchronize()); + } + } + + std::unique_ptr gen_mtx(int num_rows, int num_cols) + { + return gko::test::generate_random_matrix( + ref, num_rows, num_cols, + std::uniform_int_distribution<>(num_cols, num_cols), + std::normal_distribution<>(0.0, 1.0), rand_engine); + } + + void initialize_data() + { + int m = 48; + int n = 17; + x = gen_mtx(m, n); + b = gen_mtx(m, n); + r = gen_mtx(m, n); + z = gen_mtx(m, n); + p = gen_mtx(m, n); + rr = gen_mtx(m, n); + s = gen_mtx(m, n); + t = gen_mtx(m, n); + y = gen_mtx(m, n); + v = gen_mtx(m, n); + prev_rho = gen_mtx(1, n); + rho = gen_mtx(1, n); + alpha = gen_mtx(1, n); + beta = gen_mtx(1, n); + gamma = gen_mtx(1, n); + omega = gen_mtx(1, n); + + d_x = Mtx::create(gpu); + d_b = Mtx::create(gpu); + d_r = Mtx::create(gpu); + d_z = Mtx::create(gpu); + d_p = Mtx::create(gpu); + d_t = Mtx::create(gpu); + d_s = Mtx::create(gpu); + d_y = Mtx::create(gpu); + d_v = Mtx::create(gpu); + d_rr = Mtx::create(gpu); + d_prev_rho = Mtx::create(gpu); + d_rho = Mtx::create(gpu); + d_alpha = Mtx::create(gpu); + d_beta = Mtx::create(gpu); + d_gamma = Mtx::create(gpu); + d_omega = Mtx::create(gpu); + d_x->copy_from(x.get()); + d_b->copy_from(b.get()); + d_r->copy_from(r.get()); + d_z->copy_from(z.get()); + d_p->copy_from(p.get()); + d_v->copy_from(v.get()); + d_y->copy_from(y.get()); + d_t->copy_from(t.get()); + d_s->copy_from(s.get()); + d_rr->copy_from(rr.get()); + d_prev_rho->copy_from(prev_rho.get()); + d_rho->copy_from(rho.get()); + d_alpha->copy_from(alpha.get()); + d_beta->copy_from(beta.get()); + d_gamma->copy_from(gamma.get()); + d_omega->copy_from(omega.get()); + } + + void make_diag_dominant(Mtx *mtx) + { + using std::abs; + for (int i = 0; i < mtx->get_num_rows(); ++i) { + auto sum = gko::zero(); + for (int j = 0; j < mtx->get_num_cols(); ++j) { + sum += abs(mtx->at(i, j)); + } + mtx->at(i, i) = sum; + } + } + + std::shared_ptr ref; + std::shared_ptr gpu; + + std::ranlux48 rand_engine; + + std::shared_ptr mtx; + std::shared_ptr d_mtx; + std::unique_ptr> gpu_tfqmr_factory; + std::unique_ptr> ref_tfqmr_factory; + + std::unique_ptr x; + std::unique_ptr b; + std::unique_ptr r; + std::unique_ptr z; + std::unique_ptr p; + std::unique_ptr rr; + std::unique_ptr s; + std::unique_ptr t; + std::unique_ptr y; + std::unique_ptr v; + std::unique_ptr prev_rho; + std::unique_ptr rho; + std::unique_ptr alpha; + std::unique_ptr beta; + std::unique_ptr gamma; + std::unique_ptr omega; + + std::unique_ptr d_x; + std::unique_ptr d_b; + std::unique_ptr d_r; + std::unique_ptr d_z; + std::unique_ptr d_p; + std::unique_ptr d_t; + std::unique_ptr d_s; + std::unique_ptr d_y; + std::unique_ptr d_v; + std::unique_ptr d_rr; + std::unique_ptr d_prev_rho; + std::unique_ptr d_rho; + std::unique_ptr d_alpha; + std::unique_ptr d_beta; + std::unique_ptr d_gamma; + std::unique_ptr d_omega; +}; + + +TEST_F(Tfqmr, GpuTfqmrInitializeIsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::tfqmr::initialize( + ref, b.get(), r.get(), rr.get(), y.get(), s.get(), t.get(), z.get(), + v.get(), p.get(), prev_rho.get(), rho.get(), alpha.get(), beta.get(), + gamma.get(), omega.get()); + gko::kernels::gpu::tfqmr::initialize( + gpu, d_b.get(), d_r.get(), d_rr.get(), d_y.get(), d_s.get(), d_t.get(), + d_z.get(), d_v.get(), d_p.get(), d_prev_rho.get(), d_rho.get(), + d_alpha.get(), d_beta.get(), d_gamma.get(), d_omega.get()); + + EXPECT_MTX_NEAR(d_r, r, 1e-14); + EXPECT_MTX_NEAR(d_z, z, 1e-14); + EXPECT_MTX_NEAR(d_p, p, 1e-14); + EXPECT_MTX_NEAR(d_y, y, 1e-14); + EXPECT_MTX_NEAR(d_t, t, 1e-14); + EXPECT_MTX_NEAR(d_s, s, 1e-14); + EXPECT_MTX_NEAR(d_rr, rr, 1e-14); + EXPECT_MTX_NEAR(d_v, v, 1e-14); + EXPECT_MTX_NEAR(d_prev_rho, prev_rho, 1e-14); + EXPECT_MTX_NEAR(d_rho, rho, 1e-14); + EXPECT_MTX_NEAR(d_alpha, alpha, 1e-14); + EXPECT_MTX_NEAR(d_beta, beta, 1e-14); + EXPECT_MTX_NEAR(d_gamma, gamma, 1e-14); + EXPECT_MTX_NEAR(d_omega, omega, 1e-14); +} + + +TEST_F(Tfqmr, GpuTfqmrStep1IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::tfqmr::step_1(ref, r.get(), p.get(), v.get(), + rho.get(), prev_rho.get(), + alpha.get(), omega.get()); + gko::kernels::gpu::tfqmr::step_1(gpu, d_r.get(), d_p.get(), d_v.get(), + d_rho.get(), d_prev_rho.get(), + d_alpha.get(), d_omega.get()); + + ASSERT_MTX_NEAR(d_p, p, 1e-14); +} + + +TEST_F(Tfqmr, GpuTfqmrStep2IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::tfqmr::step_2(ref, r.get(), s.get(), v.get(), + rho.get(), alpha.get(), beta.get()); + gko::kernels::gpu::tfqmr::step_2(gpu, d_r.get(), d_s.get(), d_v.get(), + d_rho.get(), d_alpha.get(), d_beta.get()); + + ASSERT_MTX_NEAR(d_alpha, alpha, 1e-14); + ASSERT_MTX_NEAR(d_s, s, 1e-14); +} + + +TEST_F(Tfqmr, GpuTfqmrStep3IsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::tfqmr::step_3( + ref, x.get(), r.get(), s.get(), t.get(), y.get(), z.get(), alpha.get(), + beta.get(), gamma.get(), omega.get()); + gko::kernels::gpu::tfqmr::step_3( + gpu, d_x.get(), d_r.get(), d_s.get(), d_t.get(), d_y.get(), d_z.get(), + d_alpha.get(), d_beta.get(), d_gamma.get(), d_omega.get()); + + ASSERT_MTX_NEAR(d_omega, omega, 1e-14); + ASSERT_MTX_NEAR(d_x, x, 1e-14); + ASSERT_MTX_NEAR(d_r, r, 1e-14); +} + + +TEST_F(Tfqmr, GpuTfqmrApplyOneRHSIsEquivalentToRef) +{ + int m = 123; + int n = 1; + auto ref_solver = ref_tfqmr_factory->generate(mtx); + auto gpu_solver = gpu_tfqmr_factory->generate(d_mtx); + auto b = gen_mtx(m, n); + auto x = gen_mtx(m, n); + auto d_b = Mtx::create(gpu); + auto d_x = Mtx::create(gpu); + d_b->copy_from(b.get()); + d_x->copy_from(x.get()); + + ref_solver->apply(b.get(), x.get()); + gpu_solver->apply(d_b.get(), d_x.get()); + + ASSERT_MTX_NEAR(d_b, b, 1e-13); + ASSERT_MTX_NEAR(d_x, x, 1e-13); +} + + +TEST_F(Tfqmr, GpuTfqmrApplyMultipleRHSIsEquivalentToRef) +{ + int m = 123; + int n = 16; + auto gpu_solver = gpu_tfqmr_factory->generate(d_mtx); + auto ref_solver = ref_tfqmr_factory->generate(mtx); + auto b = gen_mtx(m, n); + auto x = gen_mtx(m, n); + auto d_b = Mtx::create(gpu); + auto d_x = Mtx::create(gpu); + d_b->copy_from(b.get()); + d_x->copy_from(x.get()); + + ref_solver->apply(b.get(), x.get()); + gpu_solver->apply(d_b.get(), d_x.get()); + + ASSERT_MTX_NEAR(d_b, b, 1e-13); + ASSERT_MTX_NEAR(d_x, x, 1e-13); +} + + +} // namespace diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index fc138e7c01c..3795ab9c9ce 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -6,7 +6,8 @@ set(SOURCES preconditioner/block_jacobi_kernels.cpp solver/bicgstab_kernels.cpp solver/fcg_kernels.cpp - solver/cg_kernels.cpp) + solver/cg_kernels.cpp + solver/tfqmr_kernels.cpp) add_library(ginkgo_reference SHARED $ diff --git a/reference/solver/tfqmr_kernels.cpp b/reference/solver/tfqmr_kernels.cpp new file mode 100644 index 00000000000..e6c98222126 --- /dev/null +++ b/reference/solver/tfqmr_kernels.cpp @@ -0,0 +1,248 @@ +/************************************************************* +Copyright 2017-2018 + +Karlsruhe Institute of Technology +Universitat Jaume I +University of Tennessee + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/tfqmr_kernels.hpp" + +#include "core/base/exception_helpers.hpp" +#include "core/base/math.hpp" + + +namespace gko { +namespace kernels { +namespace reference { +namespace tfqmr { + + +template +void initialize(std::shared_ptr exec, + const matrix::Dense *b, matrix::Dense *r, + matrix::Dense *r0, matrix::Dense *u_m, + matrix::Dense *u_mp1, matrix::Dense *pu_m, + matrix::Dense *Au, matrix::Dense *Ad, + matrix::Dense *w, matrix::Dense *v, + matrix::Dense *d, matrix::Dense *taut, + matrix::Dense *rho_old, + matrix::Dense *rho, matrix::Dense *alpha, + matrix::Dense *beta, matrix::Dense *tau, + matrix::Dense *sigma, matrix::Dense *rov, + matrix::Dense *eta, matrix::Dense *nomw, + matrix::Dense *theta) +{ + for (size_type j = 0; j < b->get_num_cols(); ++j) { + taut->at(j) = one(); + rho_old->at(j) = zero(); + rho->at(j) = zero(); + alpha->at(j) = zero(); + beta->at(j) = zero(); + tau->at(j) = zero(); + sigma->at(j) = zero(); + rov->at(j) = zero(); + eta->at(j) = one(); + nomw->at(j) = one(); + theta->at(j) = zero(); + } + for (size_type i = 0; i < b->get_num_rows(); ++i) { + for (size_type j = 0; j < b->get_num_cols(); ++j) { + r->at(i, j) = b->at(i, j); + r0->at(i, j) = b->at(i, j); + u_m->at(i, j) = b->at(i, j); + w->at(i, j) = b->at(i, j); + v->at(i, j) = b->at(i, j); + d->at(i, j) = zero(); + u_mp1->at(i, j) = zero(); + pu_m->at(i, j) = zero(); + Au->at(i, j) = zero(); + Ad->at(i, j) = zero(); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_INITIALIZE_KERNEL); + + +template +void step_1(std::shared_ptr exec, + matrix::Dense *alpha, + const matrix::Dense *rov, + const matrix::Dense *rho, + const matrix::Dense *v, + const matrix::Dense *u_m, + matrix::Dense *u_mp1) +{ + for (size_type j = 0; j < v->get_num_cols(); ++j) { + if (rov->at(j) != zero()) { + alpha->at(j) = rho->at(j) / rov->at(j); + } else { + alpha->at(j) = zero(); + } + } + for (size_type i = 0; i < v->get_num_rows(); ++i) { + for (size_type j = 0; j < v->get_num_cols(); ++j) { + u_mp1->at(i, j) = u_m->at(i, j) - alpha->at(j) * v->at(i, j); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_1_KERNEL); + + +template +void step_2(std::shared_ptr exec, + const matrix::Dense *theta, + const matrix::Dense *alpha, + const matrix::Dense *eta, + matrix::Dense *sigma, const matrix::Dense *Au, + const matrix::Dense *pu_m, matrix::Dense *w, + matrix::Dense *d, matrix::Dense *Ad) +{ + for (size_type j = 0; j < d->get_num_cols(); ++j) { + if (alpha->at(j) != zero()) { + sigma->at(j) = theta->at(j) / alpha->at(j) * eta->at(j); + } else { + sigma->at(j) = zero(); + } + } + for (size_type i = 0; i < d->get_num_rows(); ++i) { + for (size_type j = 0; j < d->get_num_cols(); ++j) { + w->at(i, j) = w->at(i, j) - alpha->at(j) * Au->at(i, j); + d->at(i, j) = pu_m->at(i, j) + sigma->at(j) * d->at(i, j); + Ad->at(i, j) = Au->at(i, j) + sigma->at(j) * Ad->at(i, j); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_2_KERNEL); + + +template +void step_3(std::shared_ptr exec, + matrix::Dense *theta, + const matrix::Dense *nomw, + matrix::Dense *taut, matrix::Dense *eta, + const matrix::Dense *alpha) +{ + for (size_type j = 0; j < alpha->get_num_rows(); ++j) { + if (taut->at(j) != zero()) { + theta->at(j) = nomw->at(j) / taut->at(j); + } else { + theta->at(j) = one(); + } + auto tmp = one() / sqrt(one() + theta->at(j)); + taut->at(j) = taut->at(j) * sqrt(theta->at(j)) * tmp; + eta->at(j) = alpha->at(j) * tmp * tmp; + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_3_KERNEL); + + +template +void step_4(std::shared_ptr exec, + const matrix::Dense *eta, + const matrix::Dense *d, + const matrix::Dense *Ad, matrix::Dense *x, + matrix::Dense *r) +{ + for (size_type i = 0; i < d->get_num_rows(); ++i) { + for (size_type j = 0; j < d->get_num_cols(); ++j) { + x->at(i, j) = x->at(i, j) + eta->at(j) * d->at(i, j); + r->at(i, j) = r->at(i, j) - eta->at(j) * Ad->at(i, j); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_4_KERNEL); + + +template +void step_5(std::shared_ptr exec, + matrix::Dense *beta, + const matrix::Dense *rho_old, + const matrix::Dense *rho, + const matrix::Dense *w, + const matrix::Dense *u_m, + matrix::Dense *u_mp1) +{ + for (size_type j = 0; j < w->get_num_cols(); ++j) { + if (rho_old->at(j) != zero()) { + beta->at(j) = rho->at(j) / rho_old->at(j); + } else { + beta->at(j) = zero(); + } + } + for (size_type i = 0; i < w->get_num_rows(); ++i) { + for (size_type j = 0; j < w->get_num_cols(); ++j) { + u_mp1->at(i, j) = w->at(i, j) + beta->at(j) * u_m->at(i, j); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_5_KERNEL); + +template +void step_6(std::shared_ptr exec, + const matrix::Dense *beta, + const matrix::Dense *Au_new, + const matrix::Dense *Au, matrix::Dense *v) +{ + for (size_type i = 0; i < v->get_num_rows(); ++i) { + for (size_type j = 0; j < v->get_num_cols(); ++j) { + v->at(i, j) = + Au_new->at(i, j) + + beta->at(j) * (Au->at(i, j) + beta->at(j) * v->at(i, j)); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_6_KERNEL); + +template +void step_7(std::shared_ptr exec, + const matrix::Dense *Au_new, + const matrix::Dense *u_mp1, matrix::Dense *Au, + matrix::Dense *u_m) +{ + for (size_type i = 0; i < u_m->get_num_rows(); ++i) { + for (size_type j = 0; j < u_m->get_num_cols(); ++j) { + Au->at(i, j) = Au_new->at(i, j); + u_m->at(i, j) = u_mp1->at(i, j); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_TFQMR_STEP_7_KERNEL); + +} // namespace tfqmr +} // namespace reference +} // namespace kernels +} // namespace gko diff --git a/reference/test/solver/CMakeLists.txt b/reference/test/solver/CMakeLists.txt index 7008b0afc9c..ce655857cba 100644 --- a/reference/test/solver/CMakeLists.txt +++ b/reference/test/solver/CMakeLists.txt @@ -1,3 +1,4 @@ create_test(bicgstab_kernels) create_test(cg_kernels) create_test(fcg_kernels) +create_test(tfqmr_kernels) diff --git a/reference/test/solver/tfqmr_kernels.cpp b/reference/test/solver/tfqmr_kernels.cpp new file mode 100644 index 00000000000..ff27d554b42 --- /dev/null +++ b/reference/test/solver/tfqmr_kernels.cpp @@ -0,0 +1,121 @@ +/************************************************************* +Copyright 2017-2018 + +Karlsruhe Institute of Technology +Universitat Jaume I +University of Tennessee + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include +#include + + +namespace { + + +class Tfqmr : public ::testing::Test { +protected: + using Mtx = gko::matrix::Dense<>; + Tfqmr() + : exec(gko::ReferenceExecutor::create()), + mtx(gko::initialize( + {{1.0, -3.0, 0.0}, {-4.0, 1.0, -3.0}, {2.0, -1.0, 2.0}}, exec)), + tfqmr_factory(gko::solver::TfqmrFactory<>::create(exec, 8, 1e-15)) + {} + + std::shared_ptr exec; + std::shared_ptr mtx; + std::unique_ptr> tfqmr_factory; +}; + + +TEST_F(Tfqmr, SolvesDenseSystem) +{ + auto solver = tfqmr_factory->generate(mtx); + auto b = gko::initialize({-1.0, 3.0, 1.0}, exec); + auto x = gko::initialize({0.0, 0.0, 0.0}, exec); + + solver->apply(b.get(), x.get()); + + ASSERT_MTX_NEAR(x, l({-4.0, -1.0, 4.0}), 1e-8); +} + + +TEST_F(Tfqmr, SolvesMultipleDenseSystems) +{ + auto solver = tfqmr_factory->generate(mtx); + auto b = + gko::initialize({{-1.0, -5.0}, {3.0, 1.0}, {1.0, -2.0}}, exec); + auto x = gko::initialize({{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}, exec); + + solver->apply(b.get(), x.get()); + + ASSERT_MTX_NEAR(x, l({{-4.0, 1.0}, {-1.0, 2.0}, {4.0, -1.0}}), 1e-8); +} + + +TEST_F(Tfqmr, SolvesDenseSystemUsingAdvancedApply) +{ + auto solver = tfqmr_factory->generate(mtx); + auto alpha = gko::initialize({2.0}, exec); + auto beta = gko::initialize({-1.0}, exec); + auto b = gko::initialize({-1.0, 3.0, 1.0}, exec); + auto x = gko::initialize({0.5, 1.0, 2.0}, exec); + + solver->apply(alpha.get(), b.get(), beta.get(), x.get()); + + + ASSERT_MTX_NEAR(x, l({-8.5, -3.0, 6.0}), 1e-8); +} + + +TEST_F(Tfqmr, SolvesMultipleDenseSystemsUsingAdvancedApply) +{ + auto solver = tfqmr_factory->generate(mtx); + auto alpha = gko::initialize({2.0}, exec); + auto beta = gko::initialize({-1.0}, exec); + auto b = + gko::initialize({{-1.0, -5.0}, {3.0, 1.0}, {1.0, -2.0}}, exec); + auto x = gko::initialize({{0.5, 1.0}, {1.0, 2.0}, {2.0, 3.0}}, exec); + + solver->apply(alpha.get(), b.get(), beta.get(), x.get()); + + + ASSERT_MTX_NEAR(x, l({{-8.5, 1.0}, {-3.0, 2.0}, {6.0, -5.0}}), 1e-8); +} + + +} // namespace