Skip to content

Commit

Permalink
benchmark: estimate good number of iterations also for D_psi and give…
Browse files Browse the repository at this point in the history
… nicer output
  • Loading branch information
kostrzewa committed Mar 16, 2016
1 parent b3dcdf4 commit 86da1a2
Showing 1 changed file with 46 additions and 26 deletions.
72 changes: 46 additions & 26 deletions benchmark.c
Expand Up @@ -275,6 +275,7 @@ int main(int argc,char *argv[])
}
}
dt = gettime()-t1;
// division by g_nproc because we will average over processes
j = (int)(ceil(j_max*31.0/dt/g_nproc));
#ifdef MPI
MPI_Allreduce(&j,&j_max, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
Expand Down Expand Up @@ -378,56 +379,75 @@ int main(int argc,char *argv[])
else {
/* the non even/odd case now */
/*initialize the pseudo-fermion fields*/
j_max=1;
j_max=128;
sdt=0.;
for (k=0;k<k_max;k++) {
random_spinor_field_lexic(g_spinor_field[k], reproduce_randomnumber_flag, RN_GAUSS);
}

while(sdt < 3.) {
/* estimate a reasonable number of applications to get a reliable measurement */
#ifdef MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
t1 = gettime();
for (j=0;j<j_max;j++) {
for (k=0;k<k_max;k++) {
D_psi(g_spinor_field[k+k_max], g_spinor_field[k]);
antioptaway+=creal(g_spinor_field[k+k_max][0].s0.c0);
}
MPI_Barrier(MPI_COMM_WORLD);
#endif
t1 = gettime();
for (j=0;j<j_max;j++) {
for (k=0;k<k_max;k++) {
D_psi(g_spinor_field[k+k_max], g_spinor_field[k]);
antioptaway+=creal(g_spinor_field[k+k_max][0].s0.c0);
}
t2 = gettime();
dt=t2-t1;
}
t2 = gettime();
dt=t2-t1;
// division by g_nproc because we will average over processes using MPI_SUM
j = (int)(ceil(j_max*31.0/dt/g_nproc));
#ifdef MPI
MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(&j,&j_max, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
#else
sdt = dt;
j_max = j;
#endif
qdt=dt*dt;

/* perform the actual measurement */
#ifdef MPI
MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
sqdt = qdt;
MPI_Barrier(MPI_COMM_WORLD);
#endif
sdt=sdt/((double)g_nproc);
sqdt=sqrt(sqdt/g_nproc-sdt*sdt);
j_max*=2;
t1 = gettime();
for (j=0;j<j_max;j++) {
for (k=0;k<k_max;k++) {
D_psi(g_spinor_field[k+k_max], g_spinor_field[k]);
antioptaway+=creal(g_spinor_field[k+k_max][0].s0.c0);
}
}
j_max=j_max/2;
t2 = gettime();
dt=t2-t1;
#ifdef MPI
MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
sdt = dt;
#endif
qdt=dt*dt;
#ifdef MPI
MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
sqdt = qdt;
#endif
sdt=sdt/((double)g_nproc);
sqdt=sqrt(sqdt/g_nproc-sdt*sdt);
dts=dt;
sdt=1.0e6f*sdt/((double)(k_max*j_max*(VOLUME)));
sqdt=1.0e6f*sqdt/((double)(k_max*j_max*(VOLUME)));

if(g_proc_id==0) {
printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway);
printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n", sdt, sqdt, j_max);
printf("\n# (%d Mflops [%d bit arithmetic])\n", (int)(1680.0f/sdt),(int)sizeof(spinor)/3);
printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n\n", sdt, sqdt, j_max);
printf(" %12d Mflops(total) %8d Mflops(process)", (int)(1680.0f*g_nproc/sdt),(int)(1680.0f/sdt));
#ifdef OMP
printf("# Mflops per OpenMP thread ~ %d\n",(int)(1680.0f/(omp_num_threads*sdt)));
printf(" %8d Mflops(thread)",(int)(1680.0f/(omp_num_threads*sdt)));
#endif
printf("\n");
printf(" [ %d bit arithmetic ]\n\n",(int)(sizeof(spinor)/3));
fflush(stdout);
}
}

#ifdef HAVE_LIBLEMON
if(g_proc_id==0) {
printf("# Performing parallel IO test ...\n");
Expand Down

0 comments on commit 86da1a2

Please sign in to comment.