Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Music #15

Merged
merged 4 commits into from
Oct 21, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions cpp_routines/cos.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* cos.h
* The basic idea is to exploit Pade polynomials.
* A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
* moshier@na-net.ornl.gov) as well as actual code.
* The Cephes library can be found here: http://www.netlib.org/cephes/
*
* Created on: Jun 23, 2012
* Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
*/

#ifndef COS_H_
#define COS_H_

#include "sincos.h"

namespace vdt{

// Cos double precision --------------------------------------------------------

/// Double precision cosine: just call sincos.
inline double fast_cos(double x){double s,c;fast_sincos(x,s,c);return c;}

//------------------------------------------------------------------------------

inline float fast_cosf(float x){float s,c;fast_sincosf(x,s,c);return c;}

//------------------------------------------------------------------------------
void cosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
void fast_cosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
void cosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
void fast_cosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);

} //vdt namespace

#endif /* COS_H_ */
162 changes: 162 additions & 0 deletions cpp_routines/exp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* exp.h
* The basic idea is to exploit Pade polynomials.
* A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
* moshier@na-net.ornl.gov) as well as actual code.
* The Cephes library can be found here: http://www.netlib.org/cephes/
*
* Created on: Jun 23, 2012
* Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
*/

/*
* VDT is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser Public License for more details.
*
* You should have received a copy of the GNU Lesser Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#ifndef _VDT_EXP_
#define _VDT_EXP_

#include "vdtcore_common.h"
#include <limits>

namespace vdt{

namespace details{

const double EXP_LIMIT = 708;

const double PX1exp = 1.26177193074810590878E-4;
const double PX2exp = 3.02994407707441961300E-2;
const double PX3exp = 9.99999999999999999910E-1;
const double QX1exp = 3.00198505138664455042E-6;
const double QX2exp = 2.52448340349684104192E-3;
const double QX3exp = 2.27265548208155028766E-1;
const double QX4exp = 2.00000000000000000009E0;

const double LOG2E = 1.4426950408889634073599; // 1/log(2)

const float MAXLOGF = 88.72283905206835f;
const float MINLOGF = -88.f;

const float C1F = 0.693359375f;
const float C2F = -2.12194440e-4f;

const float PX1expf = 1.9875691500E-4f;
const float PX2expf =1.3981999507E-3f;
const float PX3expf =8.3334519073E-3f;
const float PX4expf =4.1665795894E-2f;
const float PX5expf =1.6666665459E-1f;
const float PX6expf =5.0000001201E-1f;

const float LOG2EF = 1.44269504088896341f;

}

// Exp double precision --------------------------------------------------------


/// Exponential Function double precision
inline double fast_exp(double initial_x){

double x = initial_x;
double px=details::fpfloor(details::LOG2E * x +0.5);

const int32_t n = int32_t(px);

x -= px * 6.93145751953125E-1;
x -= px * 1.42860682030941723212E-6;

const double xx = x * x;

// px = x * P(x**2).
px = details::PX1exp;
px *= xx;
px += details::PX2exp;
px *= xx;
px += details::PX3exp;
px *= x;

// Evaluate Q(x**2).
double qx = details::QX1exp;
qx *= xx;
qx += details::QX2exp;
qx *= xx;
qx += details::QX3exp;
qx *= xx;
qx += details::QX4exp;

// e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) )
x = px / (qx - px);
x = 1.0 + 2.0 * x;

// Build 2^n in double.
x *= details::uint642dp(( ((uint64_t)n) +1023)<<52);

if (initial_x > details::EXP_LIMIT)
x = std::numeric_limits<double>::infinity();
if (initial_x < -details::EXP_LIMIT)
x = 0.;

return x;

}

// Exp single precision --------------------------------------------------------

/// Exponential Function single precision
inline float fast_expf(float initial_x) {

float x = initial_x;

float z = details::fpfloor( details::LOG2EF * x +0.5f ); /* floor() truncates toward -infinity. */

x -= z * details::C1F;
x -= z * details::C2F;
const int32_t n = int32_t ( z );

const float x2 = x * x;

z = x*details::PX1expf;
z += details::PX2expf;
z *= x;
z += details::PX3expf;
z *= x;
z += details::PX4expf;
z *= x;
z += details::PX5expf;
z *= x;
z += details::PX6expf;
z *= x2;
z += x + 1.0f;

/* multiply by power of 2 */
z *= details::uint322sp((n+0x7f)<<23);

if (initial_x > details::MAXLOGF) z=std::numeric_limits<float>::infinity();
if (initial_x < details::MINLOGF) z=0.f;

return z;

}

//------------------------------------------------------------------------------

void expv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
void fast_expv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
void expfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
void fast_expfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);

} // end namespace vdt

#endif
81 changes: 58 additions & 23 deletions cpp_routines/music_track.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,24 @@ submit itself to any jurisdiction.
Project website: http://blond.web.cern.ch/
*/

// Optimised C++ routine that calculates the kicks
// Author: Danilo Quartullo, Helga Timko, Alexandre Lasheen
// Optimised C++ routine that calculates MuSiC trakc method
// Author: Danilo Quartullo, Konstantinos Iliakis

// #include "sin.h"
#include "sin.h"
#include "cos.h"
#include "exp.h"

#ifdef PARALLEL
#include <parallel/algorithm>
#else
#include <algorithm>
#endif

#include <cmath>
#include <ctime>
#include <chrono>
#include <iostream>

// using namespace vdt;
using namespace std;
using namespace vdt;

struct particle {
double de;
Expand All @@ -29,6 +36,14 @@ struct particle {
}
};

struct Comparator {
const double *dt;
Comparator(const double *_dt) : dt(_dt) {}
bool operator()(const int i1, const int i2) const
{
return dt[i1] < dt[i2];
}
};

extern "C" void music_track(double *__restrict__ beam_dt,
double *__restrict__ beam_dE,
Expand All @@ -42,35 +57,52 @@ extern "C" void music_track(double *__restrict__ beam_dt,
const double coeff3,
const double coeff4)
{
clock_t start;
double duration;

start = std::clock();


std::chrono::time_point<std::chrono::high_resolution_clock>start;
std::chrono::duration<double> duration(0.0);
start = std::chrono::system_clock::now();

// sort(&beam_dt[0], &beam_dt[n_macroparticles]);
vector<particle> particles; particles.reserve(n_macroparticles);
std::vector<particle> particles; particles.reserve(n_macroparticles);
for (int i = 0; i < n_macroparticles; i++)
particles.push_back({beam_dE[i], beam_dt[i]});
sort(particles.begin(), particles.end());
#ifdef PARALLEL
__gnu_parallel::sort(particles.begin(), particles.end());
#else
std::sort(particles.begin(), particles.end());
#endif
for (int i = 0; i < n_macroparticles; i++) {
beam_dE[i] = particles[i].de;
beam_dt[i] = particles[i].dt;
}

duration = ( clock() - start ) / (double) CLOCKS_PER_SEC;
// NOTE make sure that this is actually sorting beam_dE with regards to
// beam_dt
// #ifdef PARALLEL
// // std::cout << "parallel code\n";
// __gnu_parallel::sort(&beam_dE[0], &beam_dE[n_macroparticles],
// Comparator(beam_dt));
// __gnu_parallel::sort(&beam_dt[0], &beam_dt[n_macroparticles]);
// #else
// // std::cout << "serial code\n";
// std::sort(&beam_dE[0], &beam_dE[n_macroparticles],
// Comparator(beam_dt));
// std::sort(&beam_dt[0], &beam_dt[n_macroparticles]);
// #endif


duration = std::chrono::system_clock::now() - start;
std::cout << "sorting time: " << duration.count() << '\n';

start = std::chrono::system_clock::now();

cout<<"printf: "<< duration <<'\n';
beam_dE[0] += induced_voltage[0];
double input_first_component = 1;
double input_second_component = 0;

for (int i = 0; i < n_macroparticles - 1; i++) {
const double time_difference = beam_dt[i + 1] - beam_dt[i];
const double exp_term = exp(-alpha * time_difference);
const double cos_term = cos(omega_bar * time_difference);
const double sin_term = sin(omega_bar * time_difference);
const double exp_term = fast_exp(-alpha * time_difference);
const double cos_term = fast_cos(omega_bar * time_difference);
const double sin_term = fast_sin(omega_bar * time_difference);

const double product_first_component =
exp_term * ((cos_term + coeff1 * sin_term)
* input_first_component + coeff2 * sin_term
Expand All @@ -80,15 +112,18 @@ extern "C" void music_track(double *__restrict__ beam_dt,
exp_term * (coeff3 * sin_term * input_first_component
+ (cos_term + coeff4 * sin_term)
* input_second_component);

induced_voltage[i + 1] = cnst * (0.5 + product_first_component);
beam_dE[i + 1] += induced_voltage[i + 1];
input_first_component = product_first_component + 1;
input_second_component = product_second_component;
}





duration = std::chrono::system_clock::now() - start;
std::cout << "tracking time: " << duration.count() << '\n';


}

3 changes: 3 additions & 0 deletions impedances/music.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from scipy.constants import e
import ctypes
from setup_cpp import libblond
import time


class Music(object):
Expand All @@ -49,6 +50,7 @@ def __init__(self, Beam, resonator, n_macroparticles, n_particles):
self.coeff4 = self.alpha/self.omega_bar

def track(self):
start = time.time()

libblond.music_track(self.beam.dt.ctypes.data_as(ctypes.c_void_p),
self.beam.dE.ctypes.data_as(ctypes.c_void_p),
Expand All @@ -61,6 +63,7 @@ def track(self):
ctypes.c_double(self.coeff2),
ctypes.c_double(self.coeff3),
ctypes.c_double(self.coeff4))
print "total music_track time with .time(): ", time.time() - start

# indices_sorted = np.argsort(self.beam.dt)
# self.beam.dt = self.beam.dt[indices_sorted]
Expand Down
2 changes: 1 addition & 1 deletion setup_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
flags = '-Ofast -std=c++11'
list_cpp_files += ' cpp_routines/histogram.cpp'
elif parallel == True:
flags = '-Ofast -std=c++11 -fopenmp'
flags = '-Ofast -std=c++11 -fopenmp -DPARALLEL'
list_cpp_files += ' cpp_routines/histogram_par.cpp'

if boost_path != None:
Expand Down