diff --git a/core/solver/gmres.cpp b/core/solver/gmres.cpp index 730d539df46..a5727cdb151 100644 --- a/core/solver/gmres.cpp +++ b/core/solver/gmres.cpp @@ -169,7 +169,7 @@ void Gmres::apply_impl(const LinOp *b, LinOp *x) const * 1x norm2 n * 1x scal 2n * Restart: (1+14/d)n (every dth iteration) - * 1x gemm (d+1)n + * 1x gemv (d+1)n * 1x Preconditioner 2n * values + storage * 1x axpy 3n * 1x copy 2n diff --git a/core/solver/idr.cpp b/core/solver/idr.cpp index 555925f1e45..0cf0a5c3bde 100644 --- a/core/solver/idr.cpp +++ b/core/solver/idr.cpp @@ -175,19 +175,19 @@ void Idr::iterate(const LinOp *b, LinOp *x) const * (11/2s+10+5/(s+1))n * values + matrix/preconditioner storage * For (s+1) iterations: * (11/2s^2+31/2s+15)n * values + (s+1) * matrix/preconditioner storage - * dx SpMV: 2(s+1)n * values + (s+1) * storage - * dx Preconditioner: 2(s+1)n * values + (s+1) * storage - * 1x multidot (gemm) (s+1)n - * dx step 1 (fused axpys) s(s/2+5/2)n = approx 1 + sum k=[0,s) of (s-k+1)n - * dx step 2 (fused axpys) s(s/2+5/2)n = approx 1 + sum k=[0,s) of (s-k+1)n - * dx step 3: s(9/2s+11/2)n = sum k=[0,s) of (8k+2+s-k+1+6)n + * (s+1)x SpMV: 2(s+1)n * values + (s+1) * storage + * (s+1)x Preconditioner: 2(s+1)n * values + (s+1) * storage + * 1x multidot (gemv) (s+1)n + * sx step 1 (fused axpys) s(s/2+5/2)n = sum k=[0,s) of (s-k+2)n + * sx step 2 (fused axpys) s(s/2+5/2)n = sum k=[0,s) of (s-k+2)n + * sx step 3: s(9/2s+11/2)n = sum k=[0,s) of (8k+2+s-k+1+6)n * 1x orthogonalize g+u (8k+2)n in iteration k (0-based) - * 1x multidot (gemm) (s-k+1)n in iteration k (0-based) + * 1x multidot (gemv) (s-k+1)n in iteration k (0-based) * 2x axpy 6n * 1x dot 2n * 2x norm2 2n * 1x scale 2n - * 2x axpy 4n + * 2x axpy 6n */ while (true) { ++total_iter;