Skip to content

Commit

Permalink
inti
Browse files Browse the repository at this point in the history
  • Loading branch information
hannorein committed Jun 2, 2023
1 parent 52f4297 commit 885b019
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 72 deletions.
8 changes: 6 additions & 2 deletions examples/avx512_performance/problem.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ void run(int faster){
// Start integration
gettimeofday(&time_beginning,NULL);
reb_integrate(r, tmax);

gettimeofday(&time_end,NULL);

double e_final = reb_tools_energy(r);
Expand All @@ -101,6 +102,9 @@ void run(int faster){
}

int main(int argc, char* argv[]) {
if (argc<2) run(1);
run(0);
if (argc<2){
run(1);
}else{
run(0);
}
}
102 changes: 32 additions & 70 deletions src/integrator_whfast512.c
Original file line number Diff line number Diff line change
Expand Up @@ -299,12 +299,10 @@ static __m512d inline gravity_prefactor_avx512(
/*
Warning: assumes *particles points at the star.
*/
void reb_whfast512_interaction_step(struct reb_simulation * r, double dt){
// TODO Implement
}
void gravity_interaction_fast_avx512(
struct reb_particle * restrict particles // the pointer must be at the central object (i.e. &particles[0])
){
void reb_whfast512_interaction_step(struct reb_simulation * r, double dt){
struct reb_simulation_integrator_whfast512* const ri_whfast512 = &(r->ri_whfast512);
struct reb_particle_avx512* p512 = ri_whfast512->particles;

//seperating instructions into two halves:
// (1) inter-lane permutations
// (2) one swap with cross lane permutation
Expand All @@ -313,54 +311,30 @@ void gravity_interaction_fast_avx512(
// WARNING: assumes G == 1, softening == shiftx == shifty == shiftz == 0
// WARNING: requires 8 plantes and a central star

const __m512d x_planets = _mm512_set_pd(
particles[8].x, particles[7].x, particles[6].x, particles[5].x,
particles[4].x, particles[3].x, particles[2].x, particles[1].x
);
const __m512d y_planets = _mm512_set_pd(
particles[8].y, particles[7].y, particles[6].y, particles[5].y,
particles[4].y, particles[3].y, particles[2].y, particles[1].y
);
const __m512d z_planets = _mm512_set_pd(
particles[8].z, particles[7].z, particles[6].z, particles[5].z,
particles[4].z, particles[3].z, particles[2].z, particles[1].z
);

__m512d ax_planets = _mm512_set_pd(
particles[8].ax, particles[7].ax, particles[6].ax, particles[5].ax,
particles[4].ax, particles[3].ax, particles[2].ax, particles[1].ax
);
__m512d ay_planets = _mm512_set_pd(
particles[8].ay, particles[7].ay, particles[6].ay, particles[5].ay,
particles[4].ay, particles[3].ay, particles[2].ay, particles[1].ay
);
__m512d az_planets = _mm512_set_pd(
particles[8].az, particles[7].az, particles[6].az, particles[5].az,
particles[4].az, particles[3].az, particles[2].az, particles[1].az
);
p512->ax = _mm512_set_pd(0.,0.,0.,0.,0.,0.,0.,0.);
p512->ay = _mm512_set_pd(0.,0.,0.,0.,0.,0.,0.,0.);
p512->az = _mm512_set_pd(0.,0.,0.,0.,0.,0.,0.,0.);

__m512d m_j = _mm512_set_pd(
particles[8].m, particles[7].m, particles[6].m, particles[5].m,
particles[4].m, particles[3].m, particles[2].m, particles[1].m
);
__m512d x_j = x_planets;
__m512d y_j = y_planets;
__m512d z_j = z_planets;
__m512d x_j = p512->x;
__m512d y_j = p512->y;
__m512d z_j = p512->z;

__m512d m_j = p512->m;

// calculating the force by permutation
for (unsigned int j = 0; j < 3; j++) {
x_j = _mm512_permutex_pd(x_j, _MM_PERM_ADCB);
y_j = _mm512_permutex_pd(y_j, _MM_PERM_ADCB);
z_j = _mm512_permutex_pd(z_j, _MM_PERM_ADCB);
m_j = _mm512_permutex_pd(m_j, _MM_PERM_ADCB);
const __m512d dx_j = _mm512_sub_pd(x_planets, x_j);
const __m512d dy_j = _mm512_sub_pd(y_planets, y_j);
const __m512d dz_j = _mm512_sub_pd(z_planets, z_j);
const __m512d dx_j = _mm512_sub_pd(p512->x, x_j);
const __m512d dy_j = _mm512_sub_pd(p512->y, y_j);
const __m512d dz_j = _mm512_sub_pd(p512->z, z_j);
const __m512d prefact = gravity_prefactor_avx512(m_j, dx_j, dy_j, dz_j);

ax_planets = _mm512_sub_pd(ax_planets, _mm512_mul_pd(prefact, dx_j));
ay_planets = _mm512_sub_pd(ay_planets, _mm512_mul_pd(prefact, dy_j));
az_planets = _mm512_sub_pd(az_planets, _mm512_mul_pd(prefact, dz_j));
p512->ax = _mm512_sub_pd(p512->ax, _mm512_mul_pd(prefact, dx_j));
p512->ay = _mm512_sub_pd(p512->ay, _mm512_mul_pd(prefact, dy_j));
p512->az = _mm512_sub_pd(p512->az, _mm512_mul_pd(prefact, dz_j));
}
// swapping upper with lowe halves with cross
// 256 lane permutation instruction.
Expand All @@ -373,42 +347,28 @@ void gravity_interaction_fast_avx512(

// continuing the calculations
// first iteration does not require permutation after initialization
const __m512d dx_j = _mm512_sub_pd(x_planets, x_j);
const __m512d dy_j = _mm512_sub_pd(y_planets, y_j);
const __m512d dz_j = _mm512_sub_pd(z_planets, z_j);
const __m512d dx_j = _mm512_sub_pd(p512->x, x_j);
const __m512d dy_j = _mm512_sub_pd(p512->y, y_j);
const __m512d dz_j = _mm512_sub_pd(p512->z, z_j);
const __m512d prefact = gravity_prefactor_avx512(m_j, dx_j, dy_j, dz_j);

ax_planets = _mm512_sub_pd(ax_planets, _mm512_mul_pd(prefact, dx_j));
ay_planets = _mm512_sub_pd(ay_planets, _mm512_mul_pd(prefact, dy_j));
az_planets = _mm512_sub_pd(az_planets, _mm512_mul_pd(prefact, dz_j));
p512->ax = _mm512_sub_pd(p512->ax, _mm512_mul_pd(prefact, dx_j));
p512->ay = _mm512_sub_pd(p512->ay, _mm512_mul_pd(prefact, dy_j));
p512->az = _mm512_sub_pd(p512->az, _mm512_mul_pd(prefact, dz_j));
// calculating the force by permutation
for (unsigned int j = 0; j < 3; j++) {
x_j = _mm512_permutex_pd(x_j, _MM_PERM_ADCB);
y_j = _mm512_permutex_pd(y_j, _MM_PERM_ADCB);
z_j = _mm512_permutex_pd(z_j, _MM_PERM_ADCB);
m_j = _mm512_permutex_pd(m_j, _MM_PERM_ADCB);
const __m512d dx_j = _mm512_sub_pd(x_planets, x_j);
const __m512d dy_j = _mm512_sub_pd(y_planets, y_j);
const __m512d dz_j = _mm512_sub_pd(z_planets, z_j);
const __m512d dx_j = _mm512_sub_pd(p512->x, x_j);
const __m512d dy_j = _mm512_sub_pd(p512->y, y_j);
const __m512d dz_j = _mm512_sub_pd(p512->z, z_j);
const __m512d prefact = gravity_prefactor_avx512(m_j, dx_j, dy_j, dz_j);

ax_planets = _mm512_sub_pd(ax_planets, _mm512_mul_pd(prefact, dx_j));
ay_planets = _mm512_sub_pd(ay_planets, _mm512_mul_pd(prefact, dy_j));
az_planets = _mm512_sub_pd(az_planets, _mm512_mul_pd(prefact, dz_j));
}


double _nax[8];
double _nay[8];
double _naz[8];
_mm512_store_pd(&_nax[0], ax_planets);
_mm512_store_pd(&_nay[0], ay_planets);
_mm512_store_pd(&_naz[0], az_planets);

for (unsigned int i=1;i<9;i++){
particles[i].ax = _nax[i-1];
particles[i].ay = _nay[i-1];
particles[i].az = _naz[i-1];
p512->ax = _mm512_sub_pd(p512->ax, _mm512_mul_pd(prefact, dx_j));
p512->ay = _mm512_sub_pd(p512->ay, _mm512_mul_pd(prefact, dy_j));
p512->az = _mm512_sub_pd(p512->az, _mm512_mul_pd(prefact, dz_j));
}
}

Expand Down Expand Up @@ -652,5 +612,7 @@ void reb_integrator_whfast512_reset(struct reb_simulation* const r){
free(ri_whfast512->particles);
free(ri_whfast512->p_jh);
}
ri_whfast512->particles = NULL;
ri_whfast512->p_jh = NULL;
ri_whfast512->allocated_N = 0;
}

0 comments on commit 885b019

Please sign in to comment.