Skip to content

Commit

Permalink
Add OpenMP parallelism to "harmonize" - each OpenMP thread works on a
Browse files Browse the repository at this point in the history
different polygon.
  • Loading branch information
abensonca committed May 5, 2014
1 parent 95ba155 commit 50a2cec
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 24 deletions.
20 changes: 10 additions & 10 deletions src/configure
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@ cat <<EOF >> Makefile
# Gnu
CC = gcc
CFLAGS = -g -O3 -Wall -DLINUX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64
CFLAGS = -g -O3 -Wall -DLINUX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64 -fopenmp
F77 = gfortran
FFLAGS:= -Wall -g -O3 -DGFORTRAN -ff2c $MFLAG -D_FILE_OFFSET_BITS=64
FFLAGS:= -Wall -g -O3 -DGFORTRAN -ff2c $MFLAG -D_FILE_OFFSET_BITS=64 -fopenmp
STATICFLAGS:= -static
#MAKE=gmake
Expand All @@ -115,10 +115,10 @@ cat <<EOF >> Makefile
# Gnu
CC = gcc
CFLAGS = -g -O3 -Wall -DMACOSX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64
CFLAGS = -g -O3 -Wall -DMACOSX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64 -fopenmp
F77 = gfortran
FFLAGS:= -Wall -g -O3 -DGFORTRAN -ff2c $MFLAG -D_FILE_OFFSET_BITS=64
FFLAGS:= -Wall -g -O3 -DGFORTRAN -ff2c $MFLAG -D_FILE_OFFSET_BITS=64 -fopenmp
#STATICFLAGS:= -static-libgfortran
STATICFLAGS:= -nodefaultlibs -lSystem -lgcc -lm -lgfortran_static
# static linking of gfortran library to compile for distribution.
Expand Down Expand Up @@ -147,10 +147,10 @@ cat <<EOF >> Makefile
# Gnu
CC = gcc
CFLAGS = -g -O3 -Wall -DMACOSX -DGCC -D_FILE_OFFSET_BITS=64
CFLAGS = -g -O3 -Wall -DMACOSX -DGCC -D_FILE_OFFSET_BITS=64 -fopenmp
F77 = gfortran
FFLAGS:= -Wall -g -O3 -DGFORTRAN -ff2c -D_FILE_OFFSET_BITS=64
FFLAGS:= -Wall -g -O3 -DGFORTRAN -ff2c -D_FILE_OFFSET_BITS=64 -fopenmp
#STATICFLAGS:= -static-libgfortran
STATICFLAGS:= -nodefaultlibs -lSystem -lgcc -lm -lgfortran_static
# static linking of gfortran library to compile for distribution.
Expand Down Expand Up @@ -205,7 +205,7 @@ cat <<EOF >> Makefile
# Gnu
CC = gcc
CFLAGS = -g -O3 -Wall -DLINUX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64
CFLAGS = -g -O3 -Wall -DLINUX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64 -fopenmp
## f2c
#F77 = fort77
Expand All @@ -215,7 +215,7 @@ CFLAGS = -g -O3 -Wall -DLINUX -DGCC $MFLAG -D_FILE_OFFSET_BITS=64
# gnu g77 (which assumes no SAVE)
F77 = g77
FFLAGS = -Wimplicit -fbounds-check -g -O3 -DG77 $MFLAG -D_FILE_OFFSET_BITS=64
FFLAGS = -Wimplicit -fbounds-check -g -O3 -DG77 $MFLAG -D_FILE_OFFSET_BITS=64 -fopenmp
#MAKE=gmake
Expand Down Expand Up @@ -250,11 +250,11 @@ cat <<EOF >> Makefile
# Gnu
CC = gcc
CFLAGS = -g -O3 -Wall -DMACOSX -DGCC -D_FILE_OFFSET_BITS=64
CFLAGS = -g -O3 -Wall -DMACOSX -DGCC -D_FILE_OFFSET_BITS=64 -fopenmp
# gnu g77 (which assumes no SAVE)
F77 = g77
FFLAGS = -Wimplicit -fbounds-check -g -O3 -DG77 -D_FILE_OFFSET_BITS=64
FFLAGS = -Wimplicit -fbounds-check -g -O3 -DG77 -D_FILE_OFFSET_BITS=64 -fopenmp
#MAKE=gmake
Expand Down
1 change: 1 addition & 0 deletions src/fframe.s.f
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ subroutine fframe(framei,azi,eli,framef,azf,elf)
c saved local variables
real*10 azg,elg,elp,l2z
save azg,elg,elp,l2z
!$omp threadprivate(azg,elg,elp,l2z)
c local (automatic) variables
integer iaz
real*10 date,dd,dec2k,dr,ra2k
Expand Down
2 changes: 2 additions & 0 deletions src/gspher.s.f
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ subroutine gspher(area,bound,vert,w,lmax1,im,nw,rp,cm,np,npc,ibv,
c number of non-intersecting circles bounding area
nbd0m=0
nbd0p=0
c number of multiple intersections
nmult=0
c error check on evaluation of vertex terms
ikchk=0._10
c area=sqrt(4pi)*monopole
Expand Down
1 change: 1 addition & 0 deletions src/gsphera.s.f
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ subroutine gsphera(area,bound,vert,w,lmax1,im,nw,ibv,
c saved variables
real*10 cl,cu,dth,sl,su
save cl,cu,dth,sl,su
!$omp threadprivate(cl,cu,dth,sl,su)
c local (automatic) variables
integer i,l,m,lm,lmax,mmax
real*10 azmx,cmph,d,dph,ph,smph,thmin,thmax
Expand Down
3 changes: 3 additions & 0 deletions src/gsubs.s.f
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,10 @@ integer function gsegij(rp,cm,np,npb,npc,i,rpi,scmi,cmi,tol,ni,
real*10 bik,cmik,cmk,d,psi,psim(2),dphc,si
c local variables to be saved
integer jl,ju
data jl /0/
data ju /0/
save jl,ju
!$omp threadprivate(jl,ju)
c *
c * Determine whether next segment of i circle
c * is an edge of the polygon.
Expand Down
61 changes: 47 additions & 14 deletions src/harmonize_polys.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "manglefn.h"
#include "pi.h"

Expand All @@ -24,20 +25,15 @@
*/
int harmonize_polys(int npoly, polygon *poly[/*npoly*/], long double mtol, int lmax, harmonic w[/*NW*/])
{
int accelerate, i, ier, ip, ipoly, iq, ir, isrect, iw, naccelerate, ndone, ner, nrect;
long double azmin, azmax, elmin, elmax, azmn, azmx, elmn, elmx, tol;
/* work array contains harmonics of single polygon */
int accelerate, i, ier, ger, ip, ipoly, iq, ir, isrect, iw, naccelerate, ndone, ner, nrect;
long double azmin, azmax, elmin, elmax, azmn, azmx, elmn, elmx, tol;
/* work array contains harmonics of single polygon */
harmonic *dw;
/* work arrays to deal with possible acceleration */
int *iord, *ir_to_ip;
long double *elord;

/* work arrays */
dw = (harmonic *) malloc(sizeof(harmonic) * NW);
if (!dw) {
fprintf(stderr, "harmonize_polys: failed to allocate memory for %d harmonics\n", NW);
return(-1);
}
iord = (int *) malloc(sizeof(int) * npoly);
if (!iord) {
fprintf(stderr, "harmonize_polys: failed to allocate memory for %d ints\n", npoly);
Expand Down Expand Up @@ -85,14 +81,29 @@ int harmonize_polys(int npoly, polygon *poly[/*npoly*/], long double mtol, int l
ndone = 0;
naccelerate = 0;
ner = 0;
if (lmax >= LMAX_ADVICE) msg("doing polygon number (of %d):\n", npoly);
for (ip = 0; ip < npoly; ip++) {
ger = 0;
if (lmax >= LMAX_ADVICE) msg("doing polygon number (of %d):\n", npoly);
#pragma omp parallel private(dw,i,ip,iq,ir,iw,ier,ipoly,azmin,azmax,elmin,elmax,azmn,azmx,elmn,elmx,accelerate,tol)
{
/* work arrays */
dw = (harmonic *) malloc(sizeof(harmonic) * NW);
if (!dw) {
fprintf(stderr, "harmonize_polys: failed to allocate memory for %d harmonics\n", NW);
#pragma omp critical(globalError)
ger = -1;
}
#pragma omp barrier
if ( ger == 0 ) {
#pragma omp for
for (ip = 0; ip < npoly; ip++) {
if (ger != -1) {
if (lmax >= LMAX_ADVICE) msg(" %d", ip);
accelerate = 0;
/* rectangle */
if (ip < nrect) {
ir = iord[ip];
ipoly = ir_to_ip[ir];
if ( !omp_in_parallel() ) {
poly_to_rect(poly[ipoly], &azmin, &azmax, &elmin, &elmax);
/* does previous rectangle have same elevation limits? */
if (ip > 0) {
Expand All @@ -110,46 +121,69 @@ int harmonize_polys(int npoly, polygon *poly[/*npoly*/], long double mtol, int l
/* if so, worth accelerating */
if (elmn == elmin && elmx == elmax) accelerate = 1;
}
}
/* accelerated computation */
if (accelerate) {
ier = gsphra(azmin, azmax, elmin, elmax, lmax, dw);
if (ier == -1) return(-1);
if (ier == -1) {
#pragma omp critical(globalError)
ger = -1;
continue;
}
/* standard computation */
} else {
tol = mtol;
ier = gsphr(poly[ipoly], lmax, &tol, dw);
if (ier == -1) return(-1);
if (ier == -1) {
#pragma omp critical(globalError)
ger = -1;
continue;
}
}
/* non-rectangle */
} else {
ipoly = ir_to_ip[ip];
/* zero weight polygon requires no computation */
if (poly[ipoly]->weight == 0.) {
#pragma omp atomic
ndone++;
continue;
} else {
tol = mtol;
ier = gsphr(poly[ipoly], lmax, &tol, dw);
if (ier == -1) return(-1);
if (ier == -1) {
#pragma omp critical(globalError)
ger = -1;
continue;
}
}
}
/* computation failed */
if (ier) {
#pragma omp atomic
ner++;
if (lmax >= LMAX_ADVICE) msg("\n");
fprintf(stderr, "harmonize_polys: computation failed for polygon %d; discard it\n", ipoly);
/* success */
} else {
#pragma omp atomic
naccelerate += accelerate;
#pragma omp atomic
ndone++;
/* increment harmonics of region */
for (iw = 0; iw < NW; iw++) {
for (i = 0; i < IM; i++) {
#pragma omp atomic
w[iw][i] += dw[iw][i] * poly[ipoly]->weight;
}
}
}
}
}
}
free(dw);
}
if (ger == -1) return(-1);
if (lmax >= LMAX_ADVICE) msg("\n");

/* number of computations that were accelerated */
Expand All @@ -161,7 +195,6 @@ int harmonize_polys(int npoly, polygon *poly[/*npoly*/], long double mtol, int l
msg("spherical harmonics of %d weighted polygons accumulated\n", ndone);

/* free work arrays */
free(dw);
free(iord);
free(ir_to_ip);
free(elord);
Expand Down
6 changes: 6 additions & 0 deletions src/ikrand.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ void ikrand_(int *ik, double *ikran)
unsigned long *likran;
unsigned long long *llikran;

#pragma omp critical(ikrand)
{

/* seed random number generator with *ik */
srandom((unsigned int) *ik);

Expand All @@ -31,6 +34,9 @@ void ikrand_(int *ik, double *ikran)
*llikran <<= (8 * sizeof(unsigned int));
*llikran |= (unsigned int)random();
}

}

}

/*------------------------------------------------------------------------------
Expand Down

0 comments on commit 50a2cec

Please sign in to comment.