Skip to content

Commit

Permalink
Redo some indexing inside FESystem.
Browse files Browse the repository at this point in the history
This function takes up about 3% of the total runtime for an application I'm
working on. We can reduce that cost by nearly 50% by moving some checks outside
of loops and explicitly using std::copy(), which assumes the inputs are not
aliased, rather than for-loops.
  • Loading branch information
drwells committed Mar 12, 2023
1 parent cd1cf88 commit d0455a5
Showing 1 changed file with 32 additions and 24 deletions.
56 changes: 32 additions & 24 deletions source/fe/fe_system.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,31 +147,39 @@ namespace internal
const unsigned int n_components = fe.element_multiplicity(base_no);
const unsigned int n_dofs_per_cell =
fe.base_element(base_no).n_dofs_per_cell();
for (unsigned int component = 0; component < n_components; ++component)
for (unsigned int b = 0; b < n_dofs_per_cell; ++b)
{
const unsigned int out_index = base_to_system_table[component][b];

if (base_flags & update_values)
for (unsigned int q = 0; q < n_q_points; ++q)
output_data.shape_values[out_index][q] =
base_data.shape_values[b][q];

if (base_flags & update_gradients)
for (unsigned int q = 0; q < n_q_points; ++q)
output_data.shape_gradients[out_index][q] =
base_data.shape_gradients[b][q];

if (base_flags & update_hessians)
for (unsigned int q = 0; q < n_q_points; ++q)
output_data.shape_hessians[out_index][q] =
base_data.shape_hessians[b][q];

if (base_flags & update_3rd_derivatives)
for (unsigned int q = 0; q < n_q_points; ++q)
output_data.shape_3rd_derivatives[out_index][q] =
base_data.shape_3rd_derivatives[b][q];
}
auto copy_row = [&](const auto row_in, auto row_out) {
std::copy(row_in.begin(), row_in.begin() + n_q_points, row_out.begin());
};

if (base_flags & update_values)
for (unsigned int component = 0; component < n_components; ++component)
for (unsigned int b = 0; b < n_dofs_per_cell; ++b)
copy_row(
base_data.shape_values[b],
output_data.shape_values[base_to_system_table[component][b]]);

if (base_flags & update_gradients)
for (unsigned int component = 0; component < n_components; ++component)
for (unsigned int b = 0; b < n_dofs_per_cell; ++b)
copy_row(
base_data.shape_gradients[b],
output_data.shape_gradients[base_to_system_table[component][b]]);

if (base_flags & update_hessians)
for (unsigned int component = 0; component < n_components; ++component)
for (unsigned int b = 0; b < n_dofs_per_cell; ++b)
copy_row(
base_data.shape_hessians[b],
output_data.shape_hessians[base_to_system_table[component][b]]);

if (base_flags & update_3rd_derivatives)
for (unsigned int component = 0; component < n_components; ++component)
for (unsigned int b = 0; b < n_dofs_per_cell; ++b)
copy_row(
base_data.shape_3rd_derivatives[b],
output_data
.shape_3rd_derivatives[base_to_system_table[component][b]]);
}

/**
Expand Down

0 comments on commit d0455a5

Please sign in to comment.