Skip to content

Commit

Permalink
Merge pull request #15527 from bergbauer/fix_ecl
Browse files Browse the repository at this point in the history
FEFaceEvaluation: Fix ECL for continuous elements
  • Loading branch information
tamiko committed Jul 4, 2023
2 parents caca3d3 + 72c2906 commit 2050f26
Show file tree
Hide file tree
Showing 4 changed files with 543 additions and 64 deletions.
179 changes: 115 additions & 64 deletions include/deal.II/matrix_free/fe_evaluation.h
Original file line number Diff line number Diff line change
Expand Up @@ -3289,6 +3289,11 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::
*this->dof_info);
}

const bool accesses_exterior_dofs =
this->dof_access_index ==
internal::MatrixFreeFunctions::DoFInfo::dof_access_cell &&
this->is_interior_face() == false;

// Case 2: contiguous indices which use reduced storage of indices and can
// use vectorized load/store operations -> go to separate function
if (this->cell != numbers::invalid_unsigned_int)
Expand All @@ -3299,9 +3304,7 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::

bool is_contiguous = true;
// check if exterior cells are not contiguous (ECL case)
if (is_face && !this->interior_face &&
(this->dof_access_index ==
internal::MatrixFreeFunctions::DoFInfo::dof_access_cell))
if (accesses_exterior_dofs)
{
const std::array<unsigned int, VectorizedArrayType::size()> &cells =
this->get_cell_ids();
Expand Down Expand Up @@ -3370,7 +3373,8 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::
internal::is_vectorizable<VectorType, Number>::value>
vector_selector;

const bool use_vectorized_path = !(masking_is_active || has_hn_constraints);
const bool use_vectorized_path =
!(masking_is_active || has_hn_constraints || accesses_exterior_dofs);

const std::size_t dofs_per_component = this->data->dofs_per_component_on_cell;
std::array<VectorizedArrayType *, n_components> values_dofs;
Expand Down Expand Up @@ -3764,17 +3768,17 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::

Assert(this->cell != numbers::invalid_unsigned_int, ExcNotImplemented());

const bool accesses_exterior_dofs =
this->dof_access_index ==
internal::MatrixFreeFunctions::DoFInfo::dof_access_cell &&
this->is_interior_face() == false;

// Simple case: We have contiguous storage, so we can simply copy out the
// data
if ((dof_info.index_storage_variants[ind][this->cell] ==
internal::MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
interleaved_contiguous &&
n_lanes == VectorizedArrayType::size()) &&
!(is_face &&
this->dof_access_index ==
internal::MatrixFreeFunctions::DoFInfo::dof_access_cell &&
this->is_interior_face() == false) &&
!(!is_face && !this->is_interior_face()))
if (dof_info.index_storage_variants[ind][this->cell] ==
internal::MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
interleaved_contiguous &&
n_lanes == VectorizedArrayType::size() && !accesses_exterior_dofs)
{
const unsigned int dof_index =
dof_indices_cont[this->cell * VectorizedArrayType::size()] +
Expand Down Expand Up @@ -3806,12 +3810,6 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::
const unsigned int n_filled_lanes =
dof_info.n_vectorization_lanes_filled[ind][this->cell];

const bool is_ecl =
(this->dof_access_index ==
internal::MatrixFreeFunctions::DoFInfo::dof_access_cell &&
this->is_interior_face() == false) ||
(!is_face && !this->is_interior_face());

if (vectors_sm[0] != nullptr)
{
const auto compute_vector_ptrs = [&](const unsigned int comp) {
Expand Down Expand Up @@ -3860,7 +3858,7 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::
};

if (n_filled_lanes == VectorizedArrayType::size() &&
n_lanes == VectorizedArrayType::size() && !is_ecl)
n_lanes == VectorizedArrayType::size() && !accesses_exterior_dofs)
{
if (n_components == 1 || this->n_fe_components == 1)
{
Expand Down Expand Up @@ -3937,7 +3935,7 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::
// In the case with contiguous cell indices, we know that there are no
// constraints and that the indices within each element are contiguous
if (n_filled_lanes == VectorizedArrayType::size() &&
n_lanes == VectorizedArrayType::size() && !is_ecl)
n_lanes == VectorizedArrayType::size() && !accesses_exterior_dofs)
{
if (dof_info.index_storage_variants[ind][this->cell] ==
internal::MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
Expand Down Expand Up @@ -4026,56 +4024,109 @@ FEEvaluationBase<dim, n_components_, Number, is_face, VectorizedArrayType>::
{
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_empty(values_dofs[comp][i]);
if (dof_info.index_storage_variants[ind][this->cell] ==
internal::MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
contiguous)
{
if (n_components == 1 || this->n_fe_components == 1)
{
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i,
*src[comp],
values_dofs[comp][i][v]);
}
else
{
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i +
comp * dofs_per_component,
*src[0],
values_dofs[comp][i][v]);
}
}
else
if (accesses_exterior_dofs)
{
const unsigned int *offsets =
&dof_info.dof_indices_interleave_strides
[ind][VectorizedArrayType::size() * this->cell];
for (unsigned int v = 0; v < n_filled_lanes; ++v)
AssertIndexRange(offsets[v], VectorizedArrayType::size() + 1);
if (n_components == 1 || this->n_fe_components == 1)
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
{
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i * offsets[v],
*src[comp],
values_dofs[comp][i][v]);
if (dof_info.index_storage_variants
[ind][cells[v] / VectorizedArrayType::size()] ==
internal::MatrixFreeFunctions::DoFInfo::
IndexStorageVariants::contiguous)
{
if (n_components == 1 || this->n_fe_components == 1)
{
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i,
*src[comp],
values_dofs[comp][i][v]);
}
else
{
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i +
comp * dofs_per_component,
*src[0],
values_dofs[comp][i][v]);
}
}
else
{
const unsigned int offset =
dof_info.dof_indices_interleave_strides[ind][cells[v]];
AssertIndexRange(offset, VectorizedArrayType::size() + 1);
if (n_components == 1 || this->n_fe_components == 1)
{
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i * offset,
*src[comp],
values_dofs[comp][i][v]);
}
else
{
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(
dof_indices[v] +
(i + comp * dofs_per_component) * offset,
*src[0],
values_dofs[comp][i][v]);
}
}
}
}
else
{
if (dof_info.index_storage_variants[ind][this->cell] ==
internal::MatrixFreeFunctions::DoFInfo::IndexStorageVariants::
contiguous)
{
if (n_components == 1 || this->n_fe_components == 1)
{
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i,
*src[comp],
values_dofs[comp][i][v]);
}
else
{
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i +
comp * dofs_per_component,
*src[0],
values_dofs[comp][i][v]);
}
}
else
{
const unsigned int *offsets =
&dof_info.dof_indices_interleave_strides
[ind][VectorizedArrayType::size() * this->cell];
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] +
(i + comp * dofs_per_component) *
offsets[v],
*src[0],
values_dofs[comp][i][v]);
AssertIndexRange(offsets[v], VectorizedArrayType::size() + 1);
if (n_components == 1 || this->n_fe_components == 1)
for (unsigned int v = 0; v < n_filled_lanes; ++v)
{
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(dof_indices[v] + i * offsets[v],
*src[comp],
values_dofs[comp][i][v]);
}
else
{
for (unsigned int v = 0; v < n_filled_lanes; ++v)
if (mask[v] == true)
for (unsigned int i = 0; i < dofs_per_component; ++i)
operation.process_dof(
dof_indices[v] +
(i + comp * dofs_per_component) * offsets[v],
*src[0],
values_dofs[comp][i][v]);
}
}
}
}
Expand Down
51 changes: 51 additions & 0 deletions tests/matrix_free/ecl_05.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// ---------------------------------------------------------------------
//
// Copyright (C) 2020 - 2022 by the deal.II authors
//
// This file is part of the deal.II library.
//
// The deal.II library is free software; you can use it, redistribute
// it, and/or modify it under the terms of the GNU Lesser General
// Public License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// The full text of the license can be found in the file LICENSE.md at
// the top level directory of deal.II.
//
// ---------------------------------------------------------------------

#include <deal.II/base/quadrature_lib.h>

#include <deal.II/dofs/dof_handler.h>

#include <deal.II/fe/fe_dgq.h>
#include <deal.II/fe/mapping_q.h>

#include <deal.II/grid/grid_generator.h>

#include <deal.II/matrix_free/fe_evaluation.h>
#include <deal.II/matrix_free/matrix_free.h>
#include <deal.II/matrix_free/tools.h>

#include <deal.II/multigrid/mg_constrained_dofs.h>

#include <deal.II/numerics/vector_tools.h>

#include "ecl_fe_q.h"


// Compare the evaluation of a SIP Laplace operator with face- and element-
// centric loops on a hypercube, hypershell, and hyperball for FE_Q.
// Adapted from ecl_01.cc

int
main(int argc, char **argv)
{
Utilities::MPI::MPI_InitFinalize mpi_init(argc, argv, 1);

mpi_initlog();
deallog.precision(10);

test<2, 2, 3, double, VectorizedArray<double>>(0);
test<2, 2, 3, double, VectorizedArray<double>>(1);
test<2, 2, 3, double, VectorizedArray<double>>(2);
}

0 comments on commit 2050f26

Please sign in to comment.