fixed thread-safety issues with the core ints in TwoBodyEngine<cGTG_t…

…imes_Coulomb>
evaleev · Mar 15, 2016 · 397043b · mbanck · Oct 1, 2016 · 397043b
1 parent b7300b5
commit 397043b
Show file tree

Hide file tree

Showing 2 changed files with 51 additions and 39 deletions.
diff --git a/include/libint2/boys.h b/include/libint2/boys.h
@@ -1316,27 +1316,28 @@ namespace libint2 {
   };
 #endif
 
-  //////////////////////////////////////////////////////////
-  /// core integrals r12^k \sum_i \exp(- a_i r_12^2)
-  //////////////////////////////////////////////////////////
-
-  /// each thread needs its own scratch if k==-1
-  template <typename Real, int k> struct GaussianGmEvalScratch {
-      GaussianGmEvalScratch() = default;
-      GaussianGmEvalScratch(int) { }
-  };
-  /// create this object for each thread that needs to use GaussianGmEval<Real,-1>
-  template <typename Real>
-  struct GaussianGmEvalScratch<Real, -1> {
+  template<typename Real, int k>
+    struct GaussianGmEval;
+
+  namespace detail {
+
+    /// some evaluators need thread-local scratch, but most don't
+    template <typename CoreEval> struct CoreEvalScratch {
+        CoreEvalScratch() = default;
+        CoreEvalScratch(int) { }
+    };
+    /// GaussianGmEval<Real,-1> needs extra scratch data
+    template <typename Real>
+    struct CoreEvalScratch<GaussianGmEval<Real, -1>> {
       std::vector<Real> Fm_;
       std::vector<Real> g_i;
       std::vector<Real> r_i;
       std::vector<Real> oorhog_i;
-      GaussianGmEvalScratch() = default;
-      GaussianGmEvalScratch(int mmax) {
+      CoreEvalScratch() = default;
+      CoreEvalScratch(int mmax) {
         init(mmax);
       }
-    private:
+      private:
       void init(int mmax) {
         Fm_.resize(mmax+1);
         g_i.resize(mmax+1);
@@ -1345,8 +1346,12 @@ namespace libint2 {
         g_i[0] = 1.0;
         r_i[0] = 1.0;
       }
-  };
+    };
+  } // namespace libint2::detail
 
+  //////////////////////////////////////////////////////////
+  /// core integrals r12^k \sum_i \exp(- a_i r_12^2)
+  //////////////////////////////////////////////////////////
 
   /**
    * Evaluates core integral \$ G_m(\rho, T) = \left( - \frac{\partial}{\partial T} \right)^n G_0(\rho,T) \f$,
@@ -1370,27 +1375,25 @@ namespace libint2 {
    * \note for more details see DOI: 10.1039/b605188j
    */
   template<typename Real, int k>
-  struct GaussianGmEval : private GaussianGmEvalScratch<Real, k> // N.B. empty-base optimization
+  struct GaussianGmEval : private detail::CoreEvalScratch<GaussianGmEval<Real,k>> // N.B. empty-base optimization
   {
 
       /**
        * @param[in] mmax the evaluator will be used to compute Gm(T) for 0 <= m <= mmax
        */
       GaussianGmEval(int mmax, Real precision) :
-          GaussianGmEvalScratch<Real, k>(mmax), mmax_(mmax),
-          precision_(precision), fm_eval_(0),
+          detail::CoreEvalScratch<GaussianGmEval<Real, k>>(mmax), mmax_(mmax),
+          precision_(precision), fm_eval_(),
           numbers_(-1,-1,mmax) {
         assert(k == -1 || k == 0 || k == 2);
         // for k=-1 need to evaluate the Boys function
         if (k == -1) {
-          fm_eval_ = new FmEval_Taylor<Real>(mmax_, precision_);
-//          fm_eval_ = new FmEval_Chebyshev3(mmax_);
+          fm_eval_ = FmEval_Taylor<Real>::instance(mmax_, precision_);
+//          fm_eval_ = FmEval_Chebyshev3::instance(mmax_);
         }
       }
 
       ~GaussianGmEval() {
-        delete fm_eval_;
-        fm_eval_ = 0;
       }
 
       // some features require at least C++11
@@ -1429,7 +1432,8 @@ namespace libint2 {
        * @param[in] mmax mmax the maximum value of m for which Boys function will be computed;
        *                 it must be <= the value returned by max_m() (this is not checked)
        * @param[in] geminal the Gaussian geminal for which the core integral \f$ Gm(\rho, T) \f$ is computed
-       * @param[in] scr if \c k ==-1 and need this to be reentrant, must provide ptr to the per-thread \c GaussianGmEvalScratch<Real,-1> object;
+       * @param[in] scr if \c k ==-1 and need this to be reentrant, must provide ptr to
+       *                the per-thread \c libint2::detail::CoreEvalScratch<GaussianGmEval<Real,-1>> object;
        *                no need to specify \c scr otherwise
        */
       template <typename AnyReal>
@@ -1443,7 +1447,7 @@ namespace libint2 {
         const double oo_sqrt_rho = 1.0/sqrt_rho;
         if (k == -1) {
           void* _scr = (scr == 0) ? this : scr;
-          GaussianGmEvalScratch<Real, -1>& scratch = *(reinterpret_cast<GaussianGmEvalScratch<Real, -1>*>(_scr));
+          auto& scratch = *(reinterpret_cast<detail::CoreEvalScratch<GaussianGmEval<Real, -1>>*>(_scr));
           for(int i=1; i<=mmax; i++) {
             scratch.r_i[i] = scratch.r_i[i-1] * rho;
           }
@@ -1470,7 +1474,7 @@ namespace libint2 {
 
           if (k == -1) {
             void* _scr = (scr == 0) ? this : scr;
-            GaussianGmEvalScratch<Real, -1>& scratch = *(reinterpret_cast<GaussianGmEvalScratch<Real, -1>*>(_scr));
+            auto& scratch = *(reinterpret_cast<detail::CoreEvalScratch<GaussianGmEval<Real, -1>>*>(_scr));
 
             const double rorgT = rorg * T;
             fm_eval_->eval(&scratch.Fm_[0], rorgT, mmax);
@@ -1527,7 +1531,7 @@ namespace libint2 {
     private:
       int mmax_;
       Real precision_; //< absolute precision
-      FmEval_Taylor<Real>* fm_eval_;
+      std::shared_ptr<FmEval_Taylor<Real>> fm_eval_;
 
       ExpensiveNumbers<Real> numbers_;
   };

diff --git a/include/libint2/engine.h b/include/libint2/engine.h
@@ -36,6 +36,7 @@
 #include <libint2/timer.h>
 #include <libint2/solidharmonics.h>
 #include <libint2/any.h>
+#include <libint2/util/compressed_pair.h>
 
 #include <libint2/boost/preprocessor.hpp>
 
@@ -895,7 +896,7 @@ BOOST_PP_LIST_FOR_EACH_I ( BOOST_PP_ONEBODYENGINE_MCR5, _, BOOST_PP_ONEBODY_OPER
       typedef typename libint2::TwoBodyEngineTraits<Kernel>::oper_params_type oper_params_type;
 
       /// creates a default (unusable) TwoBodyEngine
-      TwoBodyEngine() : primdata_(), lmax_(-1), core_eval_(0) {
+      TwoBodyEngine() : primdata_(), lmax_(-1), core_eval_pack_() {
         set_precision(std::numeric_limits<real_t>::epsilon());
       }
 
@@ -924,7 +925,10 @@ BOOST_PP_LIST_FOR_EACH_I ( BOOST_PP_ONEBODYENGINE_MCR5, _, BOOST_PP_ONEBODY_OPER
         primdata_(max_nprim * max_nprim * max_nprim * max_nprim),
         spbra_(max_nprim), spket_(max_nprim),
         lmax_(max_l), deriv_order_(deriv_order),
-        core_eval_(core_eval_type::instance(4*lmax_ + deriv_order, std::min(std::numeric_limits<real_t>::epsilon(),precision)))
+        core_eval_pack_(core_eval_type::instance(4*lmax_ + deriv_order,
+                                                 std::numeric_limits<real_t>::epsilon()),
+                        core_eval_scratch_type(4*lmax_ + deriv_order)
+                       )
       {
         set_precision(precision);
         initialize();
@@ -938,7 +942,7 @@ BOOST_PP_LIST_FOR_EACH_I ( BOOST_PP_ONEBODYENGINE_MCR5, _, BOOST_PP_ONEBODY_OPER
         spbra_(std::move(other.spbra_)), spket_(std::move(other.spket_)),
         lmax_(other.lmax_), deriv_order_(other.deriv_order_),
         precision_(other.precision_), ln_precision_(other.ln_precision_),
-        core_eval_(std::move(other.core_eval_)),
+        core_eval_pack_(std::move(other.core_eval_pack_)),
         core_ints_params_(std::move(other.core_ints_params_)),
         scratch_(std::move(other.scratch_)) {
       }
@@ -949,7 +953,7 @@ BOOST_PP_LIST_FOR_EACH_I ( BOOST_PP_ONEBODYENGINE_MCR5, _, BOOST_PP_ONEBODY_OPER
         spbra_(other.spbra_), spket_(other.spket_),
         lmax_(other.lmax_), deriv_order_(other.deriv_order_),
         precision_(other.precision_), ln_precision_(other.ln_precision_),
-        core_eval_(other.core_eval_), core_ints_params_(other.core_ints_params_)
+        core_eval_pack_(other.core_eval_pack_), core_ints_params_(other.core_ints_params_)
       {
         initialize();
       }
@@ -966,7 +970,7 @@ BOOST_PP_LIST_FOR_EACH_I ( BOOST_PP_ONEBODYENGINE_MCR5, _, BOOST_PP_ONEBODY_OPER
         deriv_order_ = other.deriv_order_;
         precision_ = other.precision_;
         ln_precision_ = other.ln_precision_;
-        core_eval_ = std::move(other.core_eval_);
+        core_eval_pack_ = std::move(other.core_eval_pack_);
         core_ints_params_ = std::move(other.core_ints_params_);
         scratch_ = std::move(other.scratch_);
         return *this;
@@ -980,7 +984,7 @@ BOOST_PP_LIST_FOR_EACH_I ( BOOST_PP_ONEBODYENGINE_MCR5, _, BOOST_PP_ONEBODY_OPER
         deriv_order_ = other.deriv_order_;
         precision_ = other.precision_;
         ln_precision_ = other.ln_precision_;
-        core_eval_ = other.core_eval_;
+        core_eval_pack_ = other.core_eval_pack_;
         core_ints_params_ = other.core_ints_params_;
         initialize();
         return *this;
@@ -1353,7 +1357,11 @@ BOOST_PP_LIST_FOR_EACH_I ( BOOST_PP_ONEBODYENGINE_MCR5, _, BOOST_PP_ONEBODY_OPER
       real_t ln_precision_;
 
       typedef typename libint2::TwoBodyEngineTraits<Kernel>::core_eval_type core_eval_type;
-      std::shared_ptr<core_eval_type> core_eval_;
+      typedef typename libint2::detail::CoreEvalScratch<core_eval_type> core_eval_scratch_type;
+      typedef libint2::detail::compressed_pair<std::shared_ptr<core_eval_type>,
+                                               core_eval_scratch_type>
+              core_eval_pack_type;
+      core_eval_pack_type core_eval_pack_;
 
       typedef oper_params_type core_ints_params_type; // currently core ints params are always same type as operator params
       core_ints_params_type core_ints_params_;
@@ -1432,7 +1440,7 @@ BOOST_PP_LIST_FOR_EACH_I ( BOOST_PP_ONEBODYENGINE_MCR5, _, BOOST_PP_ONEBODY_OPER
                               int mmax,
                               real_t T,
                               real_t) {
-          engine->core_eval_->eval(Fm, T, mmax);
+          engine->core_eval_pack_.first()->eval(Fm, T, mmax);
         }
     };
 
@@ -1443,7 +1451,7 @@ BOOST_PP_LIST_FOR_EACH_I ( BOOST_PP_ONEBODYENGINE_MCR5, _, BOOST_PP_ONEBODY_OPER
                               int mmax,
                               real_t T,
                               real_t rho) {
-          engine->core_eval_->eval(Gm, rho, T, mmax, engine->core_ints_params_);
+          engine->core_eval_pack_.first()->eval(Gm, rho, T, mmax, engine->core_ints_params_, &engine->core_eval_pack_.second());
         }
     };
     template <>
@@ -1453,7 +1461,7 @@ BOOST_PP_LIST_FOR_EACH_I ( BOOST_PP_ONEBODYENGINE_MCR5, _, BOOST_PP_ONEBODY_OPER
                               int mmax,
                               real_t T,
                               real_t rho) {
-          engine->core_eval_->eval(Gm, rho, T, mmax, engine->core_ints_params_);
+          engine->core_eval_pack_.first()->eval(Gm, rho, T, mmax, engine->core_ints_params_);
         }
     };
     template <>
@@ -1463,7 +1471,7 @@ BOOST_PP_LIST_FOR_EACH_I ( BOOST_PP_ONEBODYENGINE_MCR5, _, BOOST_PP_ONEBODY_OPER
                               int mmax,
                               real_t T,
                               real_t rho) {
-          engine->core_eval_->eval(Gm, rho, T, mmax, engine->core_ints_params_);
+          engine->core_eval_pack_.first()->eval(Gm, rho, T, mmax, engine->core_ints_params_);
         }
     };
     template <>
@@ -1473,7 +1481,7 @@ BOOST_PP_LIST_FOR_EACH_I ( BOOST_PP_ONEBODYENGINE_MCR5, _, BOOST_PP_ONEBODY_OPER
                               int mmax,
                               real_t T,
                               real_t rho) {
-          engine->core_eval_->eval(Gm, rho, T, mmax);
+          engine->core_eval_pack_.first()->eval(Gm, rho, T, mmax);
         }
     };
   }