Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MatrixFree DoFInfo: Break out from some expensive loops #14115

Merged
merged 1 commit into from
Jul 20, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
62 changes: 44 additions & 18 deletions source/matrix_free/dof_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,9 @@ namespace internal
const unsigned int *dof_indices =
this->dof_indices.data() +
row_starts[i * vectorization_length * n_components].first;
for (unsigned int k = 0; k < ndofs; ++k)
for (unsigned int k = 0;
k < ndofs && indices_are_interleaved_and_contiguous;
++k)
for (unsigned int j = 0; j < n_comp; ++j)
if (dof_indices[j * ndofs + k] !=
dof_indices[0] + k * n_comp + j)
Expand Down Expand Up @@ -663,7 +665,9 @@ namespace internal
for (unsigned int j = 0; j < n_comp; ++j)
offsets[j] =
dof_indices[j * ndofs + 1] - dof_indices[j * ndofs];
for (unsigned int k = 0; k < ndofs; ++k)
for (unsigned int k = 0;
k < ndofs && indices_are_interleaved_and_mixed != 0;
++k)
for (unsigned int j = 0; j < n_comp; ++j)
// the first if case is to avoid negative offsets
// (invalid)
Expand Down Expand Up @@ -712,19 +716,36 @@ namespace internal
index_storage_variants[dof_access_cell][i] =
IndexStorageVariants::full;

// do not use interleaved storage if two vectorized
// components point to the same field (scatter not
// possible)
for (unsigned int k = 0; k < ndofs; ++k)
for (unsigned int l = 0; l < n_comp; ++l)
for (unsigned int j = l + 1; j < n_comp; ++j)
if (dof_indices[j * ndofs + k] ==
dof_indices[l * ndofs + k])
{
index_storage_variants[dof_access_cell][i] =
IndexStorageVariants::full;
break;
}
// do not use interleaved storage if two entries within
// vectorized array point to the same index (scatter not
// possible due to race condition)
for (const unsigned int *indices = dof_indices;
indices != dof_indices + ndofs;
++indices)
Comment on lines +722 to +724
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that I could also break out of this loop, but the break here is a very unlikely case, so I consider it more expensive to add a check than continuing. Of course, if we want code symmetry we might still consider having it here as well.

{
bool is_sorted = true;
unsigned int previous = indices[0];
for (unsigned int l = 1; l < n_comp; ++l)
{
const unsigned int current = indices[l * ndofs];
if (current <= previous)
is_sorted = false;

// the simple check failed, must compare all
// indices manually - due to short sizes this
// O(n^2) algorithm is better than sorting
if (!is_sorted)
for (unsigned int j = 0; j < l; ++j)
if (indices[j * ndofs] == current)
{
index_storage_variants
[dof_access_cell][i] =
IndexStorageVariants::full;
break;
}
previous = current;
}
}
}
}
}
Expand Down Expand Up @@ -848,9 +869,14 @@ namespace internal
ndofs * vectorization_length,
this->dof_indices_interleaved.size() + 1);
for (unsigned int k = 0; k < ndofs; ++k)
for (unsigned int j = 0; j < vectorization_length; ++j)
interleaved_dof_indices[k * vectorization_length + j] =
dof_indices[j * ndofs + k];
{
const unsigned int *my_dof_indices = dof_indices + k;
const unsigned int *end =
interleaved_dof_indices + vectorization_length;
for (; interleaved_dof_indices != end;
++interleaved_dof_indices, my_dof_indices += ndofs)
*interleaved_dof_indices = *my_dof_indices;
}
}
}

Expand Down