Skip to content

Commit

Permalink
Merge pull request #14115 from kronbichler/avoid_some_expensive_loops
Browse files Browse the repository at this point in the history
MatrixFree DoFInfo: Break out from some expensive loops
  • Loading branch information
bangerth committed Jul 20, 2022
2 parents 6a07408 + 90c9ee6 commit 10aeb35
Showing 1 changed file with 44 additions and 18 deletions.
62 changes: 44 additions & 18 deletions source/matrix_free/dof_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,9 @@ namespace internal
const unsigned int *dof_indices =
this->dof_indices.data() +
row_starts[i * vectorization_length * n_components].first;
for (unsigned int k = 0; k < ndofs; ++k)
for (unsigned int k = 0;
k < ndofs && indices_are_interleaved_and_contiguous;
++k)
for (unsigned int j = 0; j < n_comp; ++j)
if (dof_indices[j * ndofs + k] !=
dof_indices[0] + k * n_comp + j)
Expand Down Expand Up @@ -663,7 +665,9 @@ namespace internal
for (unsigned int j = 0; j < n_comp; ++j)
offsets[j] =
dof_indices[j * ndofs + 1] - dof_indices[j * ndofs];
for (unsigned int k = 0; k < ndofs; ++k)
for (unsigned int k = 0;
k < ndofs && indices_are_interleaved_and_mixed != 0;
++k)
for (unsigned int j = 0; j < n_comp; ++j)
// the first if case is to avoid negative offsets
// (invalid)
Expand Down Expand Up @@ -712,19 +716,36 @@ namespace internal
index_storage_variants[dof_access_cell][i] =
IndexStorageVariants::full;

// do not use interleaved storage if two vectorized
// components point to the same field (scatter not
// possible)
for (unsigned int k = 0; k < ndofs; ++k)
for (unsigned int l = 0; l < n_comp; ++l)
for (unsigned int j = l + 1; j < n_comp; ++j)
if (dof_indices[j * ndofs + k] ==
dof_indices[l * ndofs + k])
{
index_storage_variants[dof_access_cell][i] =
IndexStorageVariants::full;
break;
}
// do not use interleaved storage if two entries within
// vectorized array point to the same index (scatter not
// possible due to race condition)
for (const unsigned int *indices = dof_indices;
indices != dof_indices + ndofs;
++indices)
{
bool is_sorted = true;
unsigned int previous = indices[0];
for (unsigned int l = 1; l < n_comp; ++l)
{
const unsigned int current = indices[l * ndofs];
if (current <= previous)
is_sorted = false;

// the simple check failed, must compare all
// indices manually - due to short sizes this
// O(n^2) algorithm is better than sorting
if (!is_sorted)
for (unsigned int j = 0; j < l; ++j)
if (indices[j * ndofs] == current)
{
index_storage_variants
[dof_access_cell][i] =
IndexStorageVariants::full;
break;
}
previous = current;
}
}
}
}
}
Expand Down Expand Up @@ -848,9 +869,14 @@ namespace internal
ndofs * vectorization_length,
this->dof_indices_interleaved.size() + 1);
for (unsigned int k = 0; k < ndofs; ++k)
for (unsigned int j = 0; j < vectorization_length; ++j)
interleaved_dof_indices[k * vectorization_length + j] =
dof_indices[j * ndofs + k];
{
const unsigned int *my_dof_indices = dof_indices + k;
const unsigned int *end =
interleaved_dof_indices + vectorization_length;
for (; interleaved_dof_indices != end;
++interleaved_dof_indices, my_dof_indices += ndofs)
*interleaved_dof_indices = *my_dof_indices;
}
}
}

Expand Down

0 comments on commit 10aeb35

Please sign in to comment.