Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revise vector operations in SolverIDR to improve performance #14256

Merged
merged 3 commits into from
Sep 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/news/changes/minor/20220912Kronbichler
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Improved: The vector operations in SolverIDR::solve() have been revised for
performance. The new implementation should show a speedup in case the
matrix-vector product and preconditioner are not much more expensive than the
vector operations. Furthermore, the usage of temporary vectors has been
reduced by two.
<br>
(Martin Kronbichler, 2022/09/12)
110 changes: 60 additions & 50 deletions include/deal.II/lac/solver_idr.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ namespace internal
if (data[i] == nullptr)
{
data[i] = std::move(typename VectorMemory<VectorType>::Pointer(mem));
data[i]->reinit(temp);
data[i]->reinit(temp, true);
}
return *data[i];
}
Expand Down Expand Up @@ -324,28 +324,25 @@ SolverIDR<VectorType>::solve(const MatrixType & A,
// Define temporary vectors which do not do not depend on s
typename VectorMemory<VectorType>::Pointer r_pointer(this->memory);
typename VectorMemory<VectorType>::Pointer v_pointer(this->memory);
typename VectorMemory<VectorType>::Pointer vhat_pointer(this->memory);
typename VectorMemory<VectorType>::Pointer uhat_pointer(this->memory);
typename VectorMemory<VectorType>::Pointer ghat_pointer(this->memory);

VectorType &r = *r_pointer;
VectorType &v = *v_pointer;
VectorType &vhat = *vhat_pointer;
VectorType &uhat = *uhat_pointer;
VectorType &ghat = *ghat_pointer;

r.reinit(x, true);
v.reinit(x, true);
vhat.reinit(x, true);
uhat.reinit(x, true);
ghat.reinit(x, true);

// Initial residual
A.vmult(r, x);
r.sadd(-1.0, 1.0, b);

using value_type = typename VectorType::value_type;
using real_type = typename numbers::NumberTraits<value_type>::real_type;

// Check for convergent initial guess
double res = r.l2_norm();
real_type res = r.l2_norm();
iteration_state = this->iteration_status(step, res, x);
if (iteration_state == SolverControl::success)
return;
Expand All @@ -354,18 +351,17 @@ SolverIDR<VectorType>::solve(const MatrixType & A,
internal::SolverIDRImplementation::TmpVectors<VectorType> G(s, this->memory);
internal::SolverIDRImplementation::TmpVectors<VectorType> U(s, this->memory);
internal::SolverIDRImplementation::TmpVectors<VectorType> Q(s, this->memory);
FullMatrix<double> M(s, s);
FullMatrix<value_type> M(s, s);

// Random number generator for vector entries of
// Q (normal distribution, mean=0 sigma=1)
std::mt19937 rng;
std::normal_distribution<> normal_distribution(0.0, 1.0);
for (unsigned int i = 0; i < s; ++i)
{
VectorType &tmp_g = G(i, x);
VectorType &tmp_u = U(i, x);
tmp_g = 0;
tmp_u = 0;
// Initialize vectors
G(i, x);
U(i, x);

// Compute random set of s orthonormalized vectors Q
// Note: the first vector is chosen to be the initial
Expand Down Expand Up @@ -400,7 +396,7 @@ SolverIDR<VectorType>::solve(const MatrixType & A,
M(i, i) = 1.;
}

double omega = 1.;
value_type omega = 1.;

bool early_exit = false;

Expand All @@ -410,18 +406,18 @@ SolverIDR<VectorType>::solve(const MatrixType & A,
++step;

// Compute phi
Vector<double> phi(s);
Vector<value_type> phi(s);
for (unsigned int i = 0; i < s; ++i)
phi(i) = Q[i] * r;

// Inner iteration over s
for (unsigned int k = 0; k < s; ++k)
{
// Solve M(k:s)*gamma = phi(k:s)
Vector<double> gamma(s - k);
Vector<value_type> gamma(s - k);
{
Vector<double> phik(s - k);
FullMatrix<double> Mk(s - k, s - k);
Vector<value_type> phik(s - k);
FullMatrix<value_type> Mk(s - k, s - k);
std::vector<unsigned int> indices;
unsigned int j = 0;
for (unsigned int i = k; i < s; ++i, ++j)
Expand All @@ -431,48 +427,63 @@ SolverIDR<VectorType>::solve(const MatrixType & A,
}
Mk.extract_submatrix_from(M, indices, indices);

FullMatrix<double> Mk_inv(s - k, s - k);
FullMatrix<value_type> Mk_inv(s - k, s - k);
Mk_inv.invert(Mk);
Mk_inv.vmult(gamma, phik);
}

{
v = r;
v = r;

unsigned int j = 0;
for (unsigned int i = k; i < s; ++i, ++j)
v.add(-1.0 * gamma(j), G[i]);
preconditioner.vmult(vhat, v);
if (step > 1)
{
for (unsigned int i = k, j = 0; i < s; ++i, ++j)
v.add(-gamma(j), G[i]);
}

preconditioner.vmult(uhat, v);

uhat = vhat;
if (step > 1)
{
uhat.sadd(omega, gamma(0), U[k]);
for (unsigned int i = k + 1, j = 1; i < s; ++i, ++j)
uhat.add(gamma(j), U[i]);
}
else
uhat *= omega;
j = 0;
for (unsigned int i = k; i < s; ++i, ++j)
uhat.add(gamma(j), U[i]);
A.vmult(ghat, uhat);
}

A.vmult(G[k], uhat);

// Update G and U
// Orthogonalize ghat to Q0,..,Q_{k-1}
// and update uhat
for (unsigned int i = 0; i < k; ++i)
// Orthogonalize G[k] to Q0,..,Q_{k-1} and update uhat
if (k > 0)
{
double alpha = (Q[i] * ghat) / M(i, i);
ghat.add(-alpha, G[i]);
uhat.add(-alpha, U[i]);
value_type alpha = Q[0] * G[k] / M(0, 0);
for (unsigned int i = 1; i < k; ++i)
{
const value_type alpha_old = alpha;
alpha = G[k].add_and_dot(-alpha, G[i - 1], Q[i]) / M(i, i);

// update uhat every other iteration to reduce vector access
if (i % 2 == 1)
uhat.add(-alpha_old, U[i - 1], -alpha, U[i]);
}
M(k, k) = G[k].add_and_dot(-alpha, G[k - 1], Q[k]);
if (k % 2 == 1)
uhat.add(-alpha, U[k - 1]);
}
G[k] = ghat;
U[k] = uhat;
else
M(k, k) = G[k] * Q[k];

U[k].swap(uhat);

// Update kth column of M
for (unsigned int i = k; i < s; ++i)
for (unsigned int i = k + 1; i < s; ++i)
M(i, k) = Q[i] * G[k];

// Orthogonalize r to Q0,...,Qk,
// update x
// Orthogonalize r to Q0,...,Qk, update x
{
double beta = phi(k) / M(k, k);
r.add(-1.0 * beta, G[k]);
const value_type beta = phi(k) / M(k, k);
r.add(-beta, G[k]);
x.add(beta, U[k]);

print_vectors(step, x, r, U[k]);
Expand Down Expand Up @@ -502,18 +513,17 @@ SolverIDR<VectorType>::solve(const MatrixType & A,
break;

// Update r and x
preconditioner.vmult(vhat, r);
A.vmult(v, vhat);
preconditioner.vmult(uhat, r);
A.vmult(v, uhat);

omega = (v * r) / (v * v);

r.add(-1.0 * omega, v);
x.add(omega, vhat);
res = std::sqrt(r.add_and_dot(-1.0 * omega, v, r));
x.add(omega, uhat);

print_vectors(step, x, r, vhat);
print_vectors(step, x, r, uhat);

// Check for convergence
res = r.l2_norm();
iteration_state = this->iteration_status(step, res, x);
if (iteration_state != SolverControl::iterate)
break;
Expand Down