Skip to content

Commit

Permalink
Merge pull request #312 from kostrzewa/benchmark.refactor
Browse files Browse the repository at this point in the history
benchmark: estimate appropriate number of iterations and give structured output
  • Loading branch information
urbach committed Mar 17, 2016
2 parents 9e46ed9 + 86da1a2 commit fc9a8ee
Showing 1 changed file with 102 additions and 57 deletions.
159 changes: 102 additions & 57 deletions benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -254,56 +254,82 @@ int main(int argc,char *argv[])
#endif

if(even_odd_flag) {
sdt=0.; sqdt=0.0;
/*initialize the pseudo-fermion fields*/
j_max=2048;
sdt=0.;
for (k = 0; k < k_max; k++) {
random_spinor_field_eo(g_spinor_field[k], reproduce_randomnumber_flag, RN_GAUSS);
}

while(sdt < 30.) {
j_max=512;
antioptaway=0.0;
/* compute approximately how many applications we need to do to get a reliable measurement */
#ifdef MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
t1 = gettime();
antioptaway=0.0;
for (j=0;j<j_max;j++) {
for (k=0;k<k_max;k++) {
Hopping_Matrix(0, g_spinor_field[k+k_max], g_spinor_field[k]);
Hopping_Matrix(1, g_spinor_field[2*k_max], g_spinor_field[k+k_max]);
antioptaway+=creal(g_spinor_field[2*k_max][0].s0.c0);
}
MPI_Barrier(MPI_COMM_WORLD);
#endif
t1 = gettime();
for (j=0;j<j_max;j++) {
for (k=0;k<k_max;k++) {
Hopping_Matrix(0, g_spinor_field[k+k_max], g_spinor_field[k]);
Hopping_Matrix(1, g_spinor_field[2*k_max], g_spinor_field[k+k_max]);
antioptaway+=creal(g_spinor_field[2*k_max][0].s0.c0);
}
t2 = gettime();
dt = t2-t1;
}
dt = gettime()-t1;
// division by g_nproc because we will average over processes
j = (int)(ceil(j_max*31.0/dt/g_nproc));
#ifdef MPI
MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(&j,&j_max, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
#else
sdt = dt;
j_max = j;
#endif
qdt=dt*dt;



/* perform the actual benchmark */
#ifdef MPI
MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
sqdt = qdt;
MPI_Barrier(MPI_COMM_WORLD);
#endif
sdt=sdt/((double)g_nproc);
sqdt=sqrt(sqdt/g_nproc-sdt*sdt);
j_max*=2;
t1 = gettime();
antioptaway=0.0;
for (j=0;j<j_max;j++) {
for (k=0;k<k_max;k++) {
Hopping_Matrix(0, g_spinor_field[k+k_max], g_spinor_field[k]);
Hopping_Matrix(1, g_spinor_field[2*k_max], g_spinor_field[k+k_max]);
antioptaway+=creal(g_spinor_field[2*k_max][0].s0.c0);
}
}
j_max=j_max/2;
dt = gettime()-t1;
#ifdef MPI
MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
sdt = dt;
#endif

qdt=dt*dt;
#ifdef MPI
MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
sqdt = qdt;
#endif

sdt=sdt/((double)g_nproc);
sqdt=sqrt(sqdt/g_nproc-sdt*sdt);

dts=dt;
sdt=1.0e6f*sdt/((double)(k_max*j_max*(VOLUME)));
sqdt=1.0e6f*sqdt/((double)(k_max*j_max*(VOLUME)));

if(g_proc_id==0) {
printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway);
printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n", sdt, sqdt, j_max);
printf("# Communication switched on:\n# (%d Mflops [%d bit arithmetic])\n", (int)(1608.0f/sdt),(int)sizeof(spinor)/3);
#ifdef MPI
printf("# Communication switched on: \n");
#endif
printf("\n%12d Mflops(total) %8d Mflops(process)", (int)(g_nproc*1608.0f/sdt),(int)(1608.0f/sdt));
#ifdef OMP
printf("# Mflops per OpenMP thread ~ %d\n",(int)(1608.0f/(omp_num_threads*sdt)));
printf(" %8d Mflops(thread)",(int)(1608.0f/(omp_num_threads*sdt)));
#endif
printf("\n");
printf(" [ %d bit arithmetic ]\n\n",(int)(sizeof(spinor)/3));
fflush(stdout);
}

Expand All @@ -329,11 +355,11 @@ int main(int argc,char *argv[])
dt=1.0e6f*dt/((double)(k_max*j_max*(VOLUME)));
if(g_proc_id==0) {
printf("# The following result is printed just to make sure that the calculation is not optimized away: %e\n",antioptaway);
printf("# Communication switched off: \n# (%d Mflops [%d bit arithmetic])\n", (int)(1608.0f/dt),(int)sizeof(spinor)/3);
printf("# Communication switched off: \n\n%12d Mflops(total) %8d Mflops(process)", (int)(g_nproc*1608.0f/dt),(int)(1608.0f/dt));
#ifdef OMP
printf("# Mflops per OpenMP thread ~ %d\n",(int)(1608.0f/(omp_num_threads*dt)));
printf(" %8d Mflops(thread)",(int)(1608.0f/(omp_num_threads*dt)));
#endif
printf("\n");
printf(" [ %d bit arithmetic ]\n\n",(int)(sizeof(spinor)/3));
fflush(stdout);
}
sdt=sdt/((double)k_max);
Expand All @@ -353,56 +379,75 @@ int main(int argc,char *argv[])
else {
/* the non even/odd case now */
/*initialize the pseudo-fermion fields*/
j_max=1;
j_max=128;
sdt=0.;
for (k=0;k<k_max;k++) {
random_spinor_field_lexic(g_spinor_field[k], reproduce_randomnumber_flag, RN_GAUSS);
}

while(sdt < 3.) {
/* estimate a reasonable number of applications to get a reliable measurement */
#ifdef MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
t1 = gettime();
for (j=0;j<j_max;j++) {
for (k=0;k<k_max;k++) {
D_psi(g_spinor_field[k+k_max], g_spinor_field[k]);
antioptaway+=creal(g_spinor_field[k+k_max][0].s0.c0);
}
MPI_Barrier(MPI_COMM_WORLD);
#endif
t1 = gettime();
for (j=0;j<j_max;j++) {
for (k=0;k<k_max;k++) {
D_psi(g_spinor_field[k+k_max], g_spinor_field[k]);
antioptaway+=creal(g_spinor_field[k+k_max][0].s0.c0);
}
t2 = gettime();
dt=t2-t1;
}
t2 = gettime();
dt=t2-t1;
// division by g_nproc because we will average over processes using MPI_SUM
j = (int)(ceil(j_max*31.0/dt/g_nproc));
#ifdef MPI
MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(&j,&j_max, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
#else
sdt = dt;
j_max = j;
#endif
qdt=dt*dt;

/* perform the actual measurement */
#ifdef MPI
MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
sqdt = qdt;
MPI_Barrier(MPI_COMM_WORLD);
#endif
sdt=sdt/((double)g_nproc);
sqdt=sqrt(sqdt/g_nproc-sdt*sdt);
j_max*=2;
t1 = gettime();
for (j=0;j<j_max;j++) {
for (k=0;k<k_max;k++) {
D_psi(g_spinor_field[k+k_max], g_spinor_field[k]);
antioptaway+=creal(g_spinor_field[k+k_max][0].s0.c0);
}
}
j_max=j_max/2;
t2 = gettime();
dt=t2-t1;
#ifdef MPI
MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
sdt = dt;
#endif
qdt=dt*dt;
#ifdef MPI
MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
sqdt = qdt;
#endif
sdt=sdt/((double)g_nproc);
sqdt=sqrt(sqdt/g_nproc-sdt*sdt);
dts=dt;
sdt=1.0e6f*sdt/((double)(k_max*j_max*(VOLUME)));
sqdt=1.0e6f*sqdt/((double)(k_max*j_max*(VOLUME)));

if(g_proc_id==0) {
printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway);
printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n", sdt, sqdt, j_max);
printf("\n# (%d Mflops [%d bit arithmetic])\n", (int)(1680.0f/sdt),(int)sizeof(spinor)/3);
printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n\n", sdt, sqdt, j_max);
printf(" %12d Mflops(total) %8d Mflops(process)", (int)(1680.0f*g_nproc/sdt),(int)(1680.0f/sdt));
#ifdef OMP
printf("# Mflops per OpenMP thread ~ %d\n",(int)(1680.0f/(omp_num_threads*sdt)));
printf(" %8d Mflops(thread)",(int)(1680.0f/(omp_num_threads*sdt)));
#endif
printf("\n");
printf(" [ %d bit arithmetic ]\n\n",(int)(sizeof(spinor)/3));
fflush(stdout);
}
}

#ifdef HAVE_LIBLEMON
if(g_proc_id==0) {
printf("# Performing parallel IO test ...\n");
Expand Down

0 comments on commit fc9a8ee

Please sign in to comment.