forked from altMITgcm/MITgcm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
global_sum_singlecpu.F
148 lines (126 loc) · 4.17 KB
/
global_sum_singlecpu.F
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
C $Header: /u/gcmpack/MITgcm/eesupp/src/global_sum_singlecpu.F,v 1.5 2012/09/03 19:36:29 jmc Exp $
C $Name: $
#include "PACKAGES_CONFIG.h"
#include "CPP_EEOPTIONS.h"
C-- File global_sum_singlecpu.F: Routines that perform global sum
C on a single CPU
C Contents
C o GLOBAL_SUM_SINGLECPU_RL
C o GLOBAL_SUM_SINGLECPU_RS <- not yet coded
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP
C !ROUTINE: GLOBAL_SUM_SINGLECPU_RL
C !INTERFACE:
SUBROUTINE GLOBAL_SUM_SINGLECPU_RL(
I phiLocal,
O sumPhi,
I oLi, oLj, myThid )
IMPLICIT NONE
C !DESCRIPTION:
C *==========================================================*
C | SUBROUTINE GLOBAL\_SUM\_SINGLECPU\_RL
C | o Handle sum for _RL data.
C *==========================================================*
C | Global sum of 2d array
C | independent of tiling as sum is performed on a single CPU
C | sum is performed in REAL*8
C *==========================================================*
C !USES:
C == Global data ==
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
#include "GLOBAL_SUM.h"
#ifdef ALLOW_EXCH2
#include "W2_EXCH2_SIZE.h"
#include "W2_EXCH2_TOPOLOGY.h"
#endif
#include "EEBUFF_SCPU.h"
C !INPUT/OUTPUT PARAMETERS:
C == Routine arguments ==
C phiLocal :: local input array without overlap regions.
C sumPhi :: Result of sum.
C oLi, oLj :: overlap size of input array in I & J direction.
C myThid :: My thread id.
INTEGER oLi, oLj
_RL phiLocal(1-oLi:sNx+oLi,1-oLj:sNy+oLj,nSx,nSy)
_RL sumPhi
INTEGER myThid
C !LOCAL VARIABLES:
C == Local variables ==
C- type declaration of: sumAll, globalBuf :
C sumAll needs to have the same length as MPI_DOUBLE_PRECISION
LOGICAL useExch2GlobLayOut, zeroBuff
INTEGER xSize, ySize
INTEGER i, j, ij
INTEGER bi, bj
Real*8 sumAll
#ifdef ALLOW_USE_MPI
INTEGER pId, idest, itag
INTEGER istatus(MPI_STATUS_SIZE), ierr
#endif /* ALLOW_USE_MPI */
CEOP
#ifdef ALLOW_EXCH2
zeroBuff = .TRUE.
useExch2GlobLayOut = .TRUE.
xSize = exch2_global_Nx
ySize = exch2_global_Ny
#else /* ALLOW_EXCH2 */
zeroBuff = .FALSE.
useExch2GlobLayOut = .FALSE.
xSize = Nx
ySize = Ny
#endif /* ALLOW_EXCH2 */
#ifdef ALLOW_USE_MPI
idest = 0
itag = 0
#endif
C-- copy (and conversion to real*8) to Shared buffer:
DO bj = myByLo(myThid), myByHi(myThid)
DO bi = myBxLo(myThid), myBxHi(myThid)
DO j=1,sNy
DO i=1,sNx
sharedLocBuf_r8(i,j,bi,bj) = phiLocal(i,j,bi,bj)
ENDDO
ENDDO
ENDDO
ENDDO
C-- Master thread does the communications and the global sum
C-- Master thread cannot start until everyone is ready:
CALL BAR2( myThid )
_BEGIN_MASTER( myThid )
C-- Gather local arrays
CALL GATHER_2D_R8(
O xy_buffer_r8,
I sharedLocBuf_r8,
I xSize, ySize,
I useExch2GlobLayOut, zeroBuff, myThid )
IF ( myProcId.EQ.0 ) THEN
C-- Process 0 sums the global array
sumAll = 0. _d 0
DO ij=1,xSize*ySize
sumAll = sumAll + xy_buffer_r8(ij)
ENDDO
#ifdef ALLOW_USE_MPI
C-- Process 0 sends result to all other processes
IF ( usingMPI ) THEN
DO pId = 1, (nPx*nPy)-1
CALL MPI_SEND (sumAll, 1, MPI_DOUBLE_PRECISION,
& pId, itag, MPI_COMM_MODEL, ierr)
ENDDO
ENDIF
ELSEIF ( usingMPI ) THEN
C-- All proceses except 0 receive result from process 0
CALL MPI_RECV (sumAll, 1, MPI_DOUBLE_PRECISION,
& idest, itag, MPI_COMM_MODEL, istatus, ierr)
#endif /* ALLOW_USE_MPI */
ENDIF
C-- Write solution to shared buffer (all threads can see it)
phiGSR8(1,0) = sumAll
_END_MASTER( myThid )
C-- Everyone wait for Master thread to be ready
CALL BAR2( myThid )
C-- set result for every threads
sumPhi = phiGSR8(1,0)
RETURN
END