blond-admin · dquartul · Oct 21, 2016 · Oct 20, 2016 · Oct 20, 2016 · Oct 20, 2016
diff --git a/cpp_routines/cos.h b/cpp_routines/cos.h
@@ -0,0 +1,36 @@
+/*
+ * cos.h
+ * The basic idea is to exploit Pade polynomials.
+ * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
+ * moshier@na-net.ornl.gov) as well as actual code. 
+ * The Cephes library can be found here:  http://www.netlib.org/cephes/
+ * 
+ *  Created on: Jun 23, 2012
+ *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
+ */
+
+#ifndef COS_H_
+#define COS_H_
+
+#include "sincos.h"
+
+namespace vdt{
+
+// Cos double precision --------------------------------------------------------
+
+/// Double precision cosine: just call sincos.
+inline double fast_cos(double x){double s,c;fast_sincos(x,s,c);return c;}
+
+//------------------------------------------------------------------------------
+
+inline float fast_cosf(float x){float s,c;fast_sincosf(x,s,c);return c;}
+
+//------------------------------------------------------------------------------
+void cosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
+void fast_cosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
+void cosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
+void fast_cosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
+
+} //vdt namespace
+
+#endif /* COS_H_ */
diff --git a/cpp_routines/exp.h b/cpp_routines/exp.h
@@ -0,0 +1,162 @@
+/*
+ * exp.h
+ * The basic idea is to exploit Pade polynomials.
+ * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
+ * moshier@na-net.ornl.gov) as well as actual code. 
+ * The Cephes library can be found here:  http://www.netlib.org/cephes/
+ * 
+ *  Created on: Jun 23, 2012
+ *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
+ */
+
+/* 
+ * VDT is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _VDT_EXP_
+#define _VDT_EXP_
+
+#include "vdtcore_common.h"
+#include <limits>
+
+namespace vdt{
+
+namespace details{
+
+  const double EXP_LIMIT = 708;
+
+  const double PX1exp = 1.26177193074810590878E-4;
+  const double PX2exp = 3.02994407707441961300E-2;
+  const double PX3exp = 9.99999999999999999910E-1;
+  const double QX1exp = 3.00198505138664455042E-6;
+  const double QX2exp = 2.52448340349684104192E-3;
+  const double QX3exp = 2.27265548208155028766E-1;
+  const double QX4exp = 2.00000000000000000009E0;
+
+  const double LOG2E = 1.4426950408889634073599; // 1/log(2)
+
+  const float MAXLOGF = 88.72283905206835f;
+  const float MINLOGF = -88.f;
+
+  const float C1F =   0.693359375f;
+  const float C2F =  -2.12194440e-4f;
+
+  const float PX1expf = 1.9875691500E-4f;
+  const float PX2expf =1.3981999507E-3f;
+  const float PX3expf =8.3334519073E-3f;
+  const float PX4expf =4.1665795894E-2f;
+  const float PX5expf =1.6666665459E-1f;
+  const float PX6expf =5.0000001201E-1f;
+
+  const float LOG2EF = 1.44269504088896341f;
+
+}
+
+// Exp double precision --------------------------------------------------------
+
+
+/// Exponential Function double precision
+inline double fast_exp(double initial_x){
+
+    double x = initial_x;
+    double px=details::fpfloor(details::LOG2E * x +0.5);
+
+    const int32_t n = int32_t(px);
+
+    x -= px * 6.93145751953125E-1;
+    x -= px * 1.42860682030941723212E-6;
+
+    const double xx = x * x;
+
+    // px = x * P(x**2).
+    px = details::PX1exp;
+    px *= xx;
+    px += details::PX2exp;
+    px *= xx;
+    px += details::PX3exp;
+    px *= x;
+
+    // Evaluate Q(x**2).
+    double qx = details::QX1exp;
+    qx *= xx;
+    qx += details::QX2exp;
+    qx *= xx;
+    qx += details::QX3exp;
+    qx *= xx;
+    qx += details::QX4exp;
+
+    // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) )
+    x = px / (qx - px);
+    x = 1.0 + 2.0 * x;
+
+    // Build 2^n in double.
+    x *= details::uint642dp(( ((uint64_t)n) +1023)<<52);
+
+    if (initial_x > details::EXP_LIMIT)
+            x = std::numeric_limits<double>::infinity();
+    if (initial_x < -details::EXP_LIMIT)
+            x = 0.;
+
+    return x;
+
+}
+
+// Exp single precision --------------------------------------------------------
+
+/// Exponential Function single precision
+inline float fast_expf(float initial_x) {
+
+    float x = initial_x;
+
+    float z = details::fpfloor( details::LOG2EF * x +0.5f ); /* floor() truncates toward -infinity. */
+
+    x -= z * details::C1F;
+    x -= z * details::C2F;
+    const int32_t n = int32_t ( z );
+
+    const float x2 = x * x;
+
+    z = x*details::PX1expf;
+    z += details::PX2expf;
+    z *= x;
+    z += details::PX3expf;
+    z *= x;
+    z += details::PX4expf;
+    z *= x;
+    z += details::PX5expf;
+    z *= x;
+    z += details::PX6expf;
+    z *= x2;
+    z += x + 1.0f;
+
+    /* multiply by power of 2 */
+    z *=  details::uint322sp((n+0x7f)<<23);
+
+    if (initial_x > details::MAXLOGF) z=std::numeric_limits<float>::infinity();
+    if (initial_x < details::MINLOGF) z=0.f;
+
+    return z;
+
+  }
+
+//------------------------------------------------------------------------------
+
+void expv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
+void fast_expv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
+void expfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
+void fast_expfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
+
+} // end namespace vdt
+
+#endif
diff --git a/cpp_routines/music_track.cpp b/cpp_routines/music_track.cpp
@@ -8,17 +8,24 @@ submit itself to any jurisdiction.
 Project website: http://blond.web.cern.ch/
 */
 
-// Optimised C++ routine that calculates the kicks
-// Author: Danilo Quartullo, Helga Timko, Alexandre Lasheen
+// Optimised C++ routine that calculates MuSiC trakc method
+// Author: Danilo Quartullo, Konstantinos Iliakis
 
-// #include "sin.h"
+#include "sin.h"
+#include "cos.h"
+#include "exp.h"
+
+#ifdef PARALLEL
+#include <parallel/algorithm>
+#else
 #include <algorithm>
+#endif
+
 #include <cmath>
-#include <ctime>
+#include <chrono>
 #include <iostream>
 
-// using namespace vdt;
-using namespace std;
+using namespace vdt;
 
 struct particle {
     double de;
@@ -29,6 +36,14 @@ struct particle {
     }
 };
 
+struct Comparator {
+    const double *dt;
+    Comparator(const double *_dt) : dt(_dt) {}
+    bool operator()(const int i1, const int i2) const
+    {
+        return dt[i1] < dt[i2];
+    }
+};
 
 extern "C" void music_track(double *__restrict__ beam_dt,
                             double *__restrict__ beam_dE,
@@ -42,35 +57,52 @@ extern "C" void music_track(double *__restrict__ beam_dt,
                             const double coeff3,
                             const double coeff4)
 {
-    clock_t start;
-    double duration;
-
-    start = std::clock();
 
-
+    std::chrono::time_point<std::chrono::high_resolution_clock>start;
+    std::chrono::duration<double> duration(0.0);
+    start = std::chrono::system_clock::now();
 
-//     sort(&beam_dt[0], &beam_dt[n_macroparticles]);
-    vector<particle> particles; particles.reserve(n_macroparticles);
+    std::vector<particle> particles; particles.reserve(n_macroparticles);
     for (int i = 0; i < n_macroparticles; i++)
         particles.push_back({beam_dE[i], beam_dt[i]});
-    sort(particles.begin(), particles.end());
+#ifdef PARALLEL
+    __gnu_parallel::sort(particles.begin(), particles.end());
+#else
+    std::sort(particles.begin(), particles.end());
+#endif
     for (int i = 0; i < n_macroparticles; i++) {
         beam_dE[i] = particles[i].de;
         beam_dt[i] = particles[i].dt;
     }
-
-    duration = ( clock() - start ) / (double) CLOCKS_PER_SEC;
+    // NOTE make sure that this is actually sorting beam_dE with regards to
+    // beam_dt
+// #ifdef PARALLEL
+//     // std::cout << "parallel code\n";
+//     __gnu_parallel::sort(&beam_dE[0], &beam_dE[n_macroparticles],
+//                          Comparator(beam_dt));
+//     __gnu_parallel::sort(&beam_dt[0], &beam_dt[n_macroparticles]);
+// #else
+//     // std::cout << "serial code\n";
+//     std::sort(&beam_dE[0], &beam_dE[n_macroparticles],
+//               Comparator(beam_dt));
+//     std::sort(&beam_dt[0], &beam_dt[n_macroparticles]);
+// #endif
+
+
+    duration = std::chrono::system_clock::now() - start;
+    std::cout << "sorting time: " << duration.count() << '\n';
+
+    start = std::chrono::system_clock::now();
 
-    cout<<"printf: "<< duration <<'\n';
     beam_dE[0] += induced_voltage[0];
     double input_first_component = 1;
     double input_second_component = 0;
-
     for (int i = 0; i < n_macroparticles - 1; i++) {
         const double time_difference = beam_dt[i + 1] - beam_dt[i];
-        const double exp_term = exp(-alpha * time_difference);
-        const double cos_term = cos(omega_bar * time_difference);
-        const double sin_term = sin(omega_bar * time_difference);
+        const double exp_term = fast_exp(-alpha * time_difference);
+        const double cos_term = fast_cos(omega_bar * time_difference);
+        const double sin_term = fast_sin(omega_bar * time_difference);
+
         const double product_first_component =
             exp_term * ((cos_term + coeff1 * sin_term)
                         * input_first_component + coeff2 * sin_term
@@ -80,15 +112,18 @@ extern "C" void music_track(double *__restrict__ beam_dt,
             exp_term * (coeff3 * sin_term * input_first_component
                         + (cos_term + coeff4 * sin_term)
                         * input_second_component);
+
         induced_voltage[i + 1] = cnst * (0.5 + product_first_component);
         beam_dE[i + 1] += induced_voltage[i + 1];
         input_first_component = product_first_component + 1;
         input_second_component = product_second_component;
     }
-
-
 
 
 
+    duration = std::chrono::system_clock::now() - start;
+    std::cout << "tracking time: " << duration.count() << '\n';
+
+
 }
 
diff --git a/impedances/music.py b/impedances/music.py
@@ -23,6 +23,7 @@
 from scipy.constants import e
 import ctypes
 from setup_cpp import libblond
+import time
 
 
 class Music(object):
@@ -49,6 +50,7 @@ def __init__(self, Beam, resonator, n_macroparticles, n_particles):
         self.coeff4 = self.alpha/self.omega_bar
 
     def track(self):
+        start = time.time()
 
         libblond.music_track(self.beam.dt.ctypes.data_as(ctypes.c_void_p),
                              self.beam.dE.ctypes.data_as(ctypes.c_void_p),
@@ -61,6 +63,7 @@ def track(self):
                              ctypes.c_double(self.coeff2),
                              ctypes.c_double(self.coeff3),
                              ctypes.c_double(self.coeff4))
+        print "total music_track time with .time(): ", time.time() - start
 
 #         indices_sorted = np.argsort(self.beam.dt)
 #         self.beam.dt = self.beam.dt[indices_sorted]

diff --git a/setup_cpp.py b/setup_cpp.py
@@ -51,7 +51,7 @@
     flags = '-Ofast -std=c++11'
     list_cpp_files += ' cpp_routines/histogram.cpp'
 elif parallel == True:
-    flags = '-Ofast -std=c++11 -fopenmp'
+    flags = '-Ofast -std=c++11 -fopenmp -DPARALLEL'
     list_cpp_files += ' cpp_routines/histogram_par.cpp'
 
 if boost_path != None: