From dd9b0c9b84f4227098cb38c5a51ae92c2124e42a Mon Sep 17 00:00:00 2001 From: Nikita Kniazev Date: Sun, 14 Apr 2019 04:26:31 +0300 Subject: [PATCH] Partly mitigate bad Clang inlining decision Because a visitor is wrapped several times during visitation it cases extra temporaries usage and useless store and loads that can only be optimized if the `visitation_impl` is inlined into the function that creates the wrapper. Clang inliner decides not to inline functions even with small-sized switches, resulting in a poor visitation code. Forceinline mark on those internal functions perceptibly improves the situation, though does not mitigate it completely. LLVM ticket https://bugs.llvm.org/show_bug.cgi?id=41491 --- include/boost/variant/detail/visitation_impl.hpp | 2 +- include/boost/variant/variant.hpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/boost/variant/detail/visitation_impl.hpp b/include/boost/variant/detail/visitation_impl.hpp index c7250c30..69b7a2b4 100644 --- a/include/boost/variant/detail/visitation_impl.hpp +++ b/include/boost/variant/detail/visitation_impl.hpp @@ -190,7 +190,7 @@ template < , typename Visitor, typename VoidPtrCV , typename NoBackupFlag > -inline typename Visitor::result_type +BOOST_FORCEINLINE typename Visitor::result_type visitation_impl( const int internal_which, const int logical_which , Visitor& visitor, VoidPtrCV storage diff --git a/include/boost/variant/variant.hpp b/include/boost/variant/variant.hpp index bf4128cd..5c199e83 100644 --- a/include/boost/variant/variant.hpp +++ b/include/boost/variant/variant.hpp @@ -2355,7 +2355,7 @@ class variant #endif// !defined(BOOST_NO_MEMBER_TEMPLATE_FRIENDS) template - static typename Visitor::result_type + BOOST_FORCEINLINE static typename Visitor::result_type internal_apply_visitor_impl( int internal_which , int logical_which @@ -2380,7 +2380,7 @@ class variant } template - typename Visitor::result_type + BOOST_FORCEINLINE typename Visitor::result_type internal_apply_visitor(Visitor& visitor) { return internal_apply_visitor_impl( @@ -2389,7 +2389,7 @@ class variant } template - typename Visitor::result_type + BOOST_FORCEINLINE typename Visitor::result_type internal_apply_visitor(Visitor& visitor) const { return internal_apply_visitor_impl(