Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid doing extra TLS lookups in FunctionParser. #13745

Merged
merged 1 commit into from
May 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/news/changes/minor/20220516DavidWells
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Improved: The thread-local storage in FunctionParser has been reworked to avoid
extra locking and unlocking calls. This makes evaluations of simple expressions
about three times faster.
<br>
(David Wells, 2022/05/16)
42 changes: 35 additions & 7 deletions include/deal.II/base/function_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -396,19 +396,48 @@ class FunctionParser : public AutoDerivativeFunction<dim>
//@}

private:
#ifdef DEAL_II_WITH_MUPARSER
/**
* Place for the variables for each thread
* Class containing the mutable state required by muParser.
*
* @note For performance reasons it is best to put all mutable state in a
* single object so that, for each function call, we only need to get
* thread-local data exactly once.
*/
mutable Threads::ThreadLocalStorage<std::vector<double>> vars;
struct ParserData
{
/**
* Default constructor. Threads::ThreadLocalStorage requires that objects be
* either default- or copy-constructible: make sure we satisfy the first
* case by declaring it here.
*/
ParserData() = default;

/**
* std::is_copy_constructible gives the wrong answer for containers with
* non-copy constructible types (e.g., std::vector<std::unique_ptr<int>>) -
* for more information, see the documentation of
* Threads::ThreadLocalStorage. Hence, to avoid compilation failures, just
* delete the copy constructor completely.
*/
ParserData(const ParserData &) = delete;

/**
* Scratch array used to set independent variables (i.e., x, y, and t)
* before each muParser call.
*/
std::vector<double> vars;

/**
* The actual muParser parser objects (hidden with PIMPL).
*/
std::vector<std::unique_ptr<internal::muParserBase>> parsers;
};

/**
* The muParser objects (hidden with the PIMPL idiom) for each thread (and one
* for each component).
*/
mutable Threads::ThreadLocalStorage<
std::vector<std::unique_ptr<internal::muParserBase>>>
fp;
mutable Threads::ThreadLocalStorage<ParserData> parser_data;

/**
* An array to keep track of all the constants, required to initialize fp in
Expand All @@ -430,7 +459,6 @@ class FunctionParser : public AutoDerivativeFunction<dim>
*/
void
init_muparser() const;
#endif

/**
* An array of function expressions (one per component), required to
Expand Down
42 changes: 35 additions & 7 deletions include/deal.II/base/tensor_function_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -262,19 +262,48 @@ class TensorFunctionParser : public TensorFunction<rank, dim, Number>
//@}

private:
#ifdef DEAL_II_WITH_MUPARSER
/**
* Place for the variables for each thread
* Class containing the mutable state required by muParser.
*
* @note For performance reasons it is best to put all mutable state in a
* single object so that, for each function call, we only need to get
* thread-local data exactly once.
*/
mutable Threads::ThreadLocalStorage<std::vector<double>> vars;
struct ParserData
{
/**
* Default constructor. Threads::ThreadLocalStorage requires that objects be
* either default- or copy-constructible: make sure we satisfy the first
* case by declaring it here.
*/
ParserData() = default;

/**
* std::is_copy_constructible gives the wrong answer for containers with
* non-copy constructible types (e.g., std::vector<std::unique_ptr<int>>) -
* for more information, see the documentation of
* Threads::ThreadLocalStorage. Hence, to avoid compilation failures, just
* delete the copy constructor completely.
*/
ParserData(const ParserData &) = delete;

/**
* Scratch array used to set independent variables (i.e., x, y, and t)
* before each muParser call.
*/
std::vector<double> vars;

/**
* The actual muParser parser objects (hidden with PIMPL).
*/
std::vector<std::unique_ptr<internal::muParserBase>> parsers;
};

/**
* The muParser objects (hidden with the PIMPL idiom) for each thread (and one
* for each component).
*/
mutable Threads::ThreadLocalStorage<
std::vector<std::unique_ptr<internal::muParserBase>>>
tfp;
mutable Threads::ThreadLocalStorage<ParserData> parser_data;

/**
* An array to keep track of all the constants, required to initialize tfp in
Expand All @@ -296,7 +325,6 @@ class TensorFunctionParser : public TensorFunction<rank, dim, Number>
*/
void
init_muparser() const;
#endif

/**
* An array of function expressions (one per component), required to
Expand Down
46 changes: 23 additions & 23 deletions source/base/function_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ FunctionParser<dim>::initialize(const std::string & variables,
const std::map<std::string, double> &constants,
const bool time_dependent)
{
this->fp.clear(); // this will reset all thread-local objects
this->parser_data.clear(); // this will reset all thread-local objects

this->constants = constants;
this->var_names = Utilities::split_string_list(variables, ',');
Expand Down Expand Up @@ -158,27 +158,25 @@ FunctionParser<dim>::init_muparser() const
// check that we have not already initialized the parser on the
// current thread, i.e., that the current function is only called
// once per thread
Assert(fp.get().size() == 0, ExcInternalError());
ParserData &data = parser_data.get();
drwells marked this conversation as resolved.
Show resolved Hide resolved
Assert(data.parsers.size() == 0 && data.vars.size() == 0, ExcInternalError());

// initialize the objects for the current thread (fp.get() and
// vars.get())
fp.get().reserve(this->n_components);
vars.get().resize(var_names.size());
// initialize the objects for the current thread
data.parsers.reserve(this->n_components);
data.vars.resize(var_names.size());
for (unsigned int component = 0; component < this->n_components; ++component)
{
fp.get().emplace_back(
std::make_unique<internal::FunctionParserImplementation::Parser>());
data.parsers.emplace_back(
new internal::FunctionParserImplementation::Parser());
mu::Parser &parser =
dynamic_cast<internal::FunctionParserImplementation::Parser &>(
*fp.get().back());
drwells marked this conversation as resolved.
Show resolved Hide resolved
*data.parsers.back());

for (const auto &constant : constants)
{
parser.DefineConst(constant.first, constant.second);
}
parser.DefineConst(constant.first, constant.second);

for (unsigned int iv = 0; iv < var_names.size(); ++iv)
parser.DefineVar(var_names[iv], &vars.get()[iv]);
parser.DefineVar(var_names[iv], &data.vars[iv]);
drwells marked this conversation as resolved.
Show resolved Hide resolved

// define some compatibility functions:
parser.DefineFun("if", internal::FunctionParser::mu_if, true);
Expand Down Expand Up @@ -282,25 +280,26 @@ FunctionParser<dim>::value(const Point<dim> & p,
AssertIndexRange(component, this->n_components);

// initialize the parser if that hasn't happened yet on the current thread
if (fp.get().size() == 0)
ParserData &data = parser_data.get();
if (data.vars.size() == 0)
init_muparser();

for (unsigned int i = 0; i < dim; ++i)
vars.get()[i] = p(i);
data.vars[i] = p(i);
if (dim != n_vars)
vars.get()[dim] = this->get_time();
data.vars[dim] = this->get_time();

try
{
Assert(dynamic_cast<internal::FunctionParserImplementation::Parser *>(
fp.get()[component].get()),
data.parsers[component].get()),
ExcInternalError());
// using dynamic_cast in the next line is about 6% slower than
// static_cast, so use the assertion above for debugging and disable
// clang-tidy:
mu::Parser &parser =
static_cast<internal::FunctionParserImplementation::Parser &>( // NOLINT
*fp.get()[component]);
*data.parsers[component]);
return parser.Eval();
}
catch (mu::ParserError &e)
Expand Down Expand Up @@ -328,23 +327,24 @@ FunctionParser<dim>::vector_value(const Point<dim> &p,


// initialize the parser if that hasn't happened yet on the current thread
if (fp.get().size() == 0)
ParserData &data = parser_data.get();
if (data.vars.size() == 0)
init_muparser();

for (unsigned int i = 0; i < dim; ++i)
vars.get()[i] = p(i);
data.vars[i] = p(i);
if (dim != n_vars)
vars.get()[dim] = this->get_time();
data.vars[dim] = this->get_time();

for (unsigned int component = 0; component < this->n_components; ++component)
{
// Same comment in value() applies here too:
Assert(dynamic_cast<internal::FunctionParserImplementation::Parser *>(
fp.get()[component].get()),
data.parsers[component].get()),
ExcInternalError());
mu::Parser &parser =
static_cast<internal::FunctionParserImplementation::Parser &>( // NOLINT
*fp.get()[component]);
*data.parsers[component]);

values(component) = parser.Eval();
}
Expand Down
50 changes: 24 additions & 26 deletions source/base/tensor_function_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// ---------------------------------------------------------------------


#include <deal.II/base/array_view.h>
#include <deal.II/base/mu_parser_internal.h>
#include <deal.II/base/patterns.h>
#include <deal.II/base/tensor.h>
Expand Down Expand Up @@ -87,7 +88,7 @@ TensorFunctionParser<rank, dim, Number>::initialize(
const std::map<std::string, double> &constants,
const bool time_dependent)
{
this->tfp.clear(); // this will reset all thread-local objects
this->parser_data.clear(); // this will reset all thread-local objects

this->constants = constants;
this->var_names = Utilities::split_string_list(variables, ',');
Expand Down Expand Up @@ -172,28 +173,26 @@ TensorFunctionParser<rank, dim, Number>::init_muparser() const
// check that we have not already initialized the parser on the
// current thread, i.e., that the current function is only called
// once per thread
Assert(tfp.get().size() == 0, ExcInternalError());
ParserData &data = parser_data.get();
Assert(data.parsers.size() == 0 && data.vars.size() == 0, ExcInternalError());

// initialize the objects for the current thread (tfp.get() and
// vars.get())
tfp.get().reserve(this->n_components);
vars.get().resize(var_names.size());
// initialize the objects for the current thread
data.parsers.reserve(this->n_components);
data.vars.resize(var_names.size());
for (unsigned int component = 0; component < this->n_components; ++component)
{
tfp.get().emplace_back(
data.parsers.emplace_back(
std::make_unique<
internal::TensorFunctionParserImplementation::Parser>());
mu::Parser &parser =
dynamic_cast<internal::TensorFunctionParserImplementation::Parser &>(
*tfp.get().back());
*data.parsers.back());

for (const auto &constant : constants)
{
parser.DefineConst(constant.first, constant.second);
}
parser.DefineConst(constant.first, constant.second);

for (unsigned int iv = 0; iv < var_names.size(); ++iv)
parser.DefineVar(var_names[iv], &vars.get()[iv]);
parser.DefineVar(var_names[iv], &data.vars[iv]);

// define some compatibility functions:
parser.DefineFun("if", internal::FunctionParser::mu_if, true);
Expand All @@ -219,7 +218,7 @@ TensorFunctionParser<rank, dim, Number>::init_muparser() const
// space between the name of the function and the opening
// parenthesis. this is awkward because it is not backward
// compatible to the library we used to use before muparser
// (the tfparser library) but also makes no real sense.
// (the fparser library) but also makes no real sense.
// consequently, in the expressions we set, remove any space
// we may find after function names
std::string transformed_expression = expressions[component];
Expand Down Expand Up @@ -295,32 +294,30 @@ TensorFunctionParser<rank, dim, Number>::value(const Point<dim> &p) const
Assert(initialized == true, ExcNotInitialized());

// initialize the parser if that hasn't happened yet on the current thread
if (tfp.get().size() == 0)
ParserData &data = parser_data.get();
if (data.vars.size() == 0)
init_muparser();

for (unsigned int i = 0; i < dim; ++i)
vars.get()[i] = p(i);
data.vars[i] = p(i);
if (dim != n_vars)
vars.get()[dim] = this->get_time();
data.vars[dim] = this->get_time();

// initialize tensor with zeros
Tensor<rank, dim, Number> value;
std::array<Number, Tensor<rank, dim, Number>::n_independent_components>
values;

try
{
unsigned int component = 0;
for (Number *value_ptr = value.begin_raw(); value_ptr != value.end_raw();
++value_ptr)
for (unsigned int component = 0; component < values.size(); ++component)
{
Assert(dynamic_cast<
internal::TensorFunctionParserImplementation::Parser *>(
tfp.get()[component].get()),
data.parsers[component].get()),
ExcInternalError());
mu::Parser &parser = static_cast< // NOLINT
internal::TensorFunctionParserImplementation::Parser &>(
*tfp.get()[component]);
*value_ptr = parser.Eval();
++component;
*data.parsers[component]);
values[component] = parser.Eval();
} // for
} // try
catch (mu::ParserError &e)
Expand All @@ -335,7 +332,8 @@ TensorFunctionParser<rank, dim, Number>::value(const Point<dim> &p) const
return Tensor<rank, dim, Number>();
} // catch

return value;
return Tensor<rank, dim, Number>(
make_array_view(values.begin(), values.end()));
}


Expand Down