forked from altMITgcm/MITgcm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
global_sum_tile.F
240 lines (201 loc) · 6.93 KB
/
global_sum_tile.F
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
C $Header: /u/gcmpack/MITgcm/eesupp/src/global_sum_tile.F,v 1.5 2015/08/25 20:26:38 jmc Exp $
C $Name: $
#include "CPP_EEOPTIONS.h"
C-- File global_sum_tile.F: Routines that perform global sum
C on a tile array
C Contents
C o GLOBAL_SUM_TILE_RL
C o GLOBAL_SUM_TILE_RS <- not yet coded
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP
C !ROUTINE: GLOBAL_SUM_TILE_RL
C !INTERFACE:
SUBROUTINE GLOBAL_SUM_TILE_RL(
I phiTile,
O sumPhi,
I myThid )
C !DESCRIPTION:
C *==========================================================*
C | SUBROUTINE GLOBAL\_SUM\_TILE\_RL
C | o Handle sum for _RL data.
C *==========================================================*
C | Apply sum on an array of one value per tile
C | and operate over all tiles & all the processes.
C *==========================================================*
C !USES:
IMPLICIT NONE
C == Global data ==
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
#include "GLOBAL_SUM.h"
C !INPUT/OUTPUT PARAMETERS:
C == Routine arguments ==
C phiTile :: Input array with one value per tile
C sumPhi :: Result of sum.
C myThid :: My thread id.
_RL phiTile(nSx,nSy)
_RL sumPhi
INTEGER myThid
C !LOCAL VARIABLES:
C == Local variables ==
C bi,bj :: Loop counters
C mpiRC :: MPI return code
C- type declaration of: sumMyPr, sumAllP, localBuf and shareBufGSR8 :
C all 4 needs to have the same length as MPI_DOUBLE_PRECISION
INTEGER bi,bj
#ifdef ALLOW_USE_MPI
#ifdef GLOBAL_SUM_SEND_RECV
INTEGER biG, bjG, np, pId
INTEGER lbuff, idest, itag, ready_to_receive
INTEGER istatus(MPI_STATUS_SIZE), ierr
Real*8 localBuf (nSx,nSy)
Real*8 globalBuf(nSx*nPx,nSy*nPy)
#elif defined (GLOBAL_SUM_ORDER_TILES)
INTEGER biG, bjG, lbuff
Real*8 localBuf (nSx*nPx,nSy*nPy)
Real*8 globalBuf(nSx*nPx,nSy*nPy)
#endif
INTEGER mpiRC
#endif /* ALLOW_USE_MPI */
Real*8 sumMyPr
Real*8 sumAllP
CEOP
C this barrier is not necessary:
c CALL BAR2( myThid )
C-- write local sum into shared-buffer array
DO bj = myByLo(myThid), myByHi(myThid)
DO bi = myBxLo(myThid), myBxHi(myThid)
shareBufGSR8(bi,bj) = phiTile(bi,bj)
ENDDO
ENDDO
C-- Master thread cannot start until everyone is ready:
CALL BAR2( myThid )
_BEGIN_MASTER( myThid )
#if (defined (GLOBAL_SUM_SEND_RECV) && defined (ALLOW_USE_MPI) )
IF ( usingMPI ) THEN
lbuff = nSx*nSy
idest = 0
itag = 0
ready_to_receive = 0
IF ( mpiMyId.NE.0 ) THEN
C-- All proceses except 0 wait to be polled then send local array
#ifndef DISABLE_MPI_READY_TO_RECEIVE
CALL MPI_RECV (ready_to_receive, 1, MPI_INTEGER,
& idest, itag, MPI_COMM_MODEL, istatus, ierr)
#endif
CALL MPI_SEND (shareBufGSR8, lbuff, MPI_DOUBLE_PRECISION,
& idest, itag, MPI_COMM_MODEL, ierr)
C-- All proceses except 0 receive result from process 0
CALL MPI_RECV (sumAllP, 1, MPI_DOUBLE_PRECISION,
& idest, itag, MPI_COMM_MODEL, istatus, ierr)
ELSE
C- case mpiMyId = 0
C-- Process 0 fills-in its local data
np = 1
DO bj=1,nSy
DO bi=1,nSx
biG = (mpi_myXGlobalLo(np)-1)/sNx+bi
bjG = (mpi_myYGlobalLo(np)-1)/sNy+bj
globalBuf(biG,bjG) = shareBufGSR8(bi,bj)
ENDDO
ENDDO
C-- Process 0 polls and receives data from each process in turn
DO np = 2, nPx*nPy
pId = np - 1
#ifndef DISABLE_MPI_READY_TO_RECEIVE
CALL MPI_SEND (ready_to_receive, 1, MPI_INTEGER,
& pId, itag, MPI_COMM_MODEL, ierr)
#endif
CALL MPI_RECV (localBuf, lbuff, MPI_DOUBLE_PRECISION,
& pId, itag, MPI_COMM_MODEL, istatus, ierr)
C-- Process 0 gathers the local arrays into a global array.
DO bj=1,nSy
DO bi=1,nSx
biG = (mpi_myXGlobalLo(np)-1)/sNx+bi
bjG = (mpi_myYGlobalLo(np)-1)/sNy+bj
globalBuf(biG,bjG) = localBuf(bi,bj)
ENDDO
ENDDO
C- end loop on np
ENDDO
C-- Sum over all tiles:
sumAllP = 0.
DO bjG = 1,nSy*nPy
DO biG = 1,nSx*nPx
sumAllP = sumAllP + globalBuf(biG,bjG)
ENDDO
ENDDO
C-- Process 0 sends result to all other processes
lbuff = 1
DO np = 2, nPx*nPy
pId = np - 1
CALL MPI_SEND (sumAllP, 1, MPI_DOUBLE_PRECISION,
& pId, itag, MPI_COMM_MODEL, ierr)
ENDDO
C End if/else mpiMyId = 0
ENDIF
ELSE
#elif (defined (GLOBAL_SUM_ORDER_TILES) && defined (ALLOW_USE_MPI) )
IF ( usingMPI ) THEN
C-- Initialise local buffer
DO bjG=1,nSy*nPy
DO biG=1,nSx*nPx
localBuf(biG,bjG) = 0.
ENDDO
ENDDO
C-- Put my own data in local buffer
DO bj=1,nSy
DO bi=1,nSx
biG = (myXGlobalLo-1)/sNx+bi
bjG = (myYGlobalLo-1)/sNy+bj
localBuf(biG,bjG) = shareBufGSR8(bi,bj)
ENDDO
ENDDO
C-- Collect data from all procs
lbuff = nSx*nPx*nSy*nPy
CALL MPI_Allreduce( localBuf, globalBuf, lbuff,
& MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_MODEL, mpiRC )
C-- Sum over all tiles:
sumAllP = 0.
DO bjG = 1,nSy*nPy
DO biG = 1,nSx*nPx
sumAllP = sumAllP + globalBuf(biG,bjG)
ENDDO
ENDDO
ELSE
#else /* not ((GLOBAL_SUM_SEND_RECV | GLOBAL_SUM_ORDER_TILES) & ALLOW_USE_MPI) */
IF ( .TRUE. ) THEN
#endif /* not ((GLOBAL_SUM_SEND_RECV | GLOBAL_SUM_ORDER_TILES) & ALLOW_USE_MPI) */
C-- Sum over all tiles (of the same process) first
sumMyPr = 0.
DO bj = 1,nSy
DO bi = 1,nSx
sumMyPr = sumMyPr + shareBufGSR8(bi,bj)
ENDDO
ENDDO
C in case MPI is not used:
sumAllP = sumMyPr
#ifdef ALLOW_USE_MPI
IF ( usingMPI ) THEN
CALL MPI_Allreduce(sumMyPr,sumAllP,1,MPI_DOUBLE_PRECISION,
& MPI_SUM,MPI_COMM_MODEL,mpiRC)
ENDIF
#endif /* ALLOW_USE_MPI */
ENDIF
C-- Write solution to shared buffer (all threads can see it)
c shareBufGSR8(1,1) = sumAllP
phiGSR8(1,0) = sumAllP
_END_MASTER( myThid )
C-- Everyone wait for Master thread to be ready
CALL BAR2( myThid )
C-- set result for every threads
c sumPhi = shareBufGSR8(1,1)
sumPhi = phiGSR8(1,0)
C-- A barrier was needed here to prevent thread 1 to modify shareBufGSR8(1,1)
C (as it would in the following call to this S/R) before all threads get
C their global-sum result out.
C No longer needed since a dedicated shared var. is used to share the output
c CALL BAR2( myThid )
RETURN
END