Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEFaceEvaluation: Fix ECL for continuous elements #15527

Merged
merged 1 commit into from
Jul 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
179 changes: 115 additions & 64 deletions include/deal.II/matrix_free/fe_evaluation.h
Original file line number Diff line number Diff line change
Expand Up @@ -3370,6 +3370,11 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::
*this->dof_info);
}

const bool accesses_exterior_dofs =
this->dof_access_index ==
internal::MatrixFreeFunctions::DoFInfo::dof_access_cell &&
this->is_interior_face() == false;

// Case 2: contiguous indices which use reduced storage of indices and can
// use vectorized load/store operations -> go to separate function
if (this->cell != numbers::invalid_unsigned_int)
Expand All @@ -3380,9 +3385,7 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::

bool is_contiguous = true;
// check if exterior cells are not contiguous (ECL case)
if (is_face && !this->interior_face &&
(this->dof_access_index ==
internal::MatrixFreeFunctions::DoFInfo::dof_access_cell))
if (accesses_exterior_dofs)
{
const std::array<unsigned int, VectorizedArrayType::size()> &cells =
this->get_cell_ids();
Expand Down Expand Up @@ -3451,7 +3454,8 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::
internal::is_vectorizable<VectorType, Number>::value>
vector_selector;

const bool use_vectorized_path = !(masking_is_active || has_hn_constraints);
const bool use_vectorized_path =
!(masking_is_active || has_hn_constraints || accesses_exterior_dofs);

const std::size_t dofs_per_component = this->data->dofs_per_component_on_cell;
std::array<VectorizedArrayType *, n_components> values_dofs;
Expand Down Expand Up @@ -3845,17 +3849,17 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::

Assert(this->cell != numbers::invalid_unsigned_int, ExcNotImplemented());

const bool accesses_exterior_dofs =
this->dof_access_index ==
internal::MatrixFreeFunctions::DoFInfo::dof_access_cell &&
this->is_interior_face() == false;

// Simple case: We have contiguous storage, so we can simply copy out the
// data
if ((dof_info.index_storage_variants[ind][this->cell] ==
internal::MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
interleaved_contiguous &&
n_lanes == VectorizedArrayType::size()) &&
!(is_face &&
this->dof_access_index ==
internal::MatrixFreeFunctions::DoFInfo::dof_access_cell &&
this->is_interior_face() == false) &&
!(!is_face && !this->is_interior_face()))
if (dof_info.index_storage_variants[ind][this->cell] ==
internal::MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
interleaved_contiguous &&
n_lanes == VectorizedArrayType::size() && !accesses_exterior_dofs)
{
const unsigned int dof_index =
dof_indices_cont[this->cell * VectorizedArrayType::size()] +
Expand Down Expand Up @@ -3887,12 +3891,6 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::
const unsigned int n_filled_lanes =
dof_info.n_vectorization_lanes_filled[ind][this->cell];

const bool is_ecl =
(this->dof_access_index ==
internal::MatrixFreeFunctions::DoFInfo::dof_access_cell &&
this->is_interior_face() == false) ||
(!is_face && !this->is_interior_face());

if (vectors_sm[0] != nullptr)
{
const auto compute_vector_ptrs = [&](const unsigned int comp) {
Expand Down Expand Up @@ -3941,7 +3939,7 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::
};

if (n_filled_lanes == VectorizedArrayType::size() &&
n_lanes == VectorizedArrayType::size() && !is_ecl)
n_lanes == VectorizedArrayType::size() && !accesses_exterior_dofs)
{
if (n_components == 1 || this->n_fe_components == 1)
{
Expand Down Expand Up @@ -4018,7 +4016,7 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::
// In the case with contiguous cell indices, we know that there are no
// constraints and that the indices within each element are contiguous
if (n_filled_lanes == VectorizedArrayType::size() &&
n_lanes == VectorizedArrayType::size() && !is_ecl)
n_lanes == VectorizedArrayType::size() && !accesses_exterior_dofs)
{
if (dof_info.index_storage_variants[ind][this->cell] ==
internal::MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
Expand Down Expand Up @@ -4107,56 +4105,109 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::
{
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_empty(values_dofs[comp][i]);
if (dof_info.index_storage_variants[ind][this->cell] ==
internal::MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
contiguous)
{
if (n_components == 1 || this->n_fe_components == 1)
{
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i,
*src[comp],
values_dofs[comp][i][v]);
}
else
{
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i +
comp * dofs_per_component,
*src[0],
values_dofs[comp][i][v]);
}
}
else
if (accesses_exterior_dofs)
{
const unsigned int *offsets =
&dof_info.dof_indices_interleave_strides
[ind][VectorizedArrayType::size() * this->cell];
for (unsigned int v = 0; v < n_filled_lanes; ++v)
AssertIndexRange(offsets[v], VectorizedArrayType::size() + 1);
if (n_components == 1 || this->n_fe_components == 1)
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
{
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i * offsets[v],
*src[comp],
values_dofs[comp][i][v]);
if (dof_info.index_storage_variants
[ind][cells[v] / VectorizedArrayType::size()] ==
internal::MatrixFreeFunctions::DoFInfo::
IndexStorageVariants::contiguous)
{
if (n_components == 1 || this->n_fe_components == 1)
{
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i,
*src[comp],
values_dofs[comp][i][v]);
}
else
{
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i +
comp * dofs_per_component,
*src[0],
values_dofs[comp][i][v]);
}
}
else
{
const unsigned int offset =
dof_info.dof_indices_interleave_strides[ind][cells[v]];
AssertIndexRange(offset, VectorizedArrayType::size() + 1);
if (n_components == 1 || this->n_fe_components == 1)
{
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i * offset,
*src[comp],
values_dofs[comp][i][v]);
}
else
{
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(
dof_indices[v] +
(i + comp * dofs_per_component) * offset,
*src[0],
values_dofs[comp][i][v]);
}
}
}
}
else
{
if (dof_info.index_storage_variants[ind][this->cell] ==
internal::MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
contiguous)
{
if (n_components == 1 || this->n_fe_components == 1)
{
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i,
*src[comp],
values_dofs[comp][i][v]);
}
else
{
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i +
comp * dofs_per_component,
*src[0],
values_dofs[comp][i][v]);
}
}
else
{
const unsigned int *offsets =
&dof_info.dof_indices_interleave_strides
[ind][VectorizedArrayType::size() * this->cell];
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] +
(i + comp * dofs_per_component) *
offsets[v],
*src[0],
values_dofs[comp][i][v]);
AssertIndexRange(offsets[v], VectorizedArrayType::size() + 1);
if (n_components == 1 || this->n_fe_components == 1)
for (unsigned int v = 0; v < n_filled_lanes; ++v)
{
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i * offsets[v],
*src[comp],
values_dofs[comp][i][v]);
}
else
{
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(
dof_indices[v] +
(i + comp * dofs_per_component) * offsets[v],
*src[0],
values_dofs[comp][i][v]);
}
}
}
}
Expand Down
51 changes: 51 additions & 0 deletions tests/matrix_free/ecl_05.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// ---------------------------------------------------------------------
//
// Copyright (C) 2020 - 2022 by the deal.II authors
//
// This file is part of the deal.II library.
//
// The deal.II library is free software; you can use it, redistribute
// it, and/or modify it under the terms of the GNU Lesser General
// Public License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// The full text of the license can be found in the file LICENSE.md at
// the top level directory of deal.II.
//
// ---------------------------------------------------------------------

#include <deal.II/base/quadrature_lib.h>

#include <deal.II/dofs/dof_handler.h>

#include <deal.II/fe/fe_dgq.h>
#include <deal.II/fe/mapping_q.h>

#include <deal.II/grid/grid_generator.h>

#include <deal.II/matrix_free/fe_evaluation.h>
#include <deal.II/matrix_free/matrix_free.h>
#include <deal.II/matrix_free/tools.h>

#include <deal.II/multigrid/mg_constrained_dofs.h>

#include <deal.II/numerics/vector_tools.h>

#include "ecl_fe_q.h"


// Compare the evaluation of a SIP Laplace operator with face- and element-
// centric loops on a hypercube, hypershell, and hyperball for FE_Q.
// Adapted from ecl_01.cc

int
main(int argc, char **argv)
{
Utilities::MPI::MPI_InitFinalize mpi_init(argc, argv, 1);
tamiko marked this conversation as resolved.
Show resolved Hide resolved

mpi_initlog();
deallog.precision(10);

test<2, 2, 3, double, VectorizedArray<double>>(0);
test<2, 2, 3, double, VectorizedArray<double>>(1);
test<2, 2, 3, double, VectorizedArray<double>>(2);
}